loading
Generated 2026-02-20T02:52:38+00:00

All Files ( 0.06% covered at 0.0 hits/line )

198 files in total.
33221 relevant lines, 20 lines covered and 33201 lines missed. ( 0.06% )
45 total branches, 0 branches covered and 45 branches missed. ( 0.0% )
File % covered Lines Relevant Lines Lines covered Lines missed Avg. Hits / Line Branch Coverage Branches Covered branches Missed branches
app/channels/application_cable/channel.rb 0.00 % 4 4 0 4 0.00 100.00 % 0 0 0
app/channels/application_cable/connection.rb 0.00 % 23 19 0 19 0.00 100.00 % 0 0 0
app/channels/llm_comment_generation_channel.rb 0.00 % 15 12 0 12 0.00 100.00 % 0 0 0
app/channels/operations_channel.rb 0.00 % 18 15 0 15 0.00 100.00 % 0 0 0
app/controllers/admin/background_jobs_controller.rb 0.00 % 892 772 0 772 0.00 100.00 % 0 0 0
app/controllers/admin/base_controller.rb 0.00 % 23 17 0 17 0.00 100.00 % 0 0 0
app/controllers/admin/issues_controller.rb 0.00 % 194 169 0 169 0.00 100.00 % 0 0 0
app/controllers/admin/storage_ingestions_controller.rb 0.00 % 149 131 0 131 0.00 100.00 % 0 0 0
app/controllers/ai_dashboard_controller.rb 0.00 % 419 344 0 344 0.00 100.00 % 0 0 0
app/controllers/application_controller.rb 0.00 % 33 22 0 22 0.00 100.00 % 0 0 0
app/controllers/concerns/profile_post_preview_support.rb 0.00 % 51 41 0 41 0.00 100.00 % 0 0 0
app/controllers/feed_captures_controller.rb 0.00 % 40 37 0 37 0.00 100.00 % 0 0 0
app/controllers/follow_graph_syncs_controller.rb 0.00 % 39 37 0 37 0.00 100.00 % 0 0 0
app/controllers/instagram_accounts_controller.rb 0.00 % 357 319 0 319 0.00 100.00 % 0 0 0
app/controllers/instagram_posts_controller.rb 0.00 % 156 131 0 131 0.00 100.00 % 0 0 0
app/controllers/instagram_profile_actions_controller.rb 0.00 % 403 369 0 369 0.00 100.00 % 0 0 0
app/controllers/instagram_profile_messages_controller.rb 0.00 % 39 35 0 35 0.00 100.00 % 0 0 0
app/controllers/instagram_profile_posts_controller.rb 0.00 % 278 249 0 249 0.00 100.00 % 0 0 0
app/controllers/instagram_profiles_controller.rb 0.00 % 194 171 0 171 0.00 100.00 % 0 0 0
app/controllers/instagram_story_people_controller.rb 0.00 % 170 149 0 149 0.00 100.00 % 0 0 0
app/controllers/workspaces_controller.rb 0.00 % 40 31 0 31 0.00 100.00 % 0 0 0
app/helpers/ai_dashboard_helper.rb 22.22 % 18 9 2 7 0.22 0.00 % 6 0 6
app/helpers/application_helper.rb 14.55 % 117 55 8 47 0.15 0.00 % 39 0 39
app/helpers/dashboard_helper.rb 100.00 % 2 1 1 0 1.00 100.00 % 0 0 0
app/helpers/instagram_accounts_helper.rb 100.00 % 2 1 1 0 1.00 100.00 % 0 0 0
app/helpers/messages_helper.rb 100.00 % 2 1 1 0 1.00 100.00 % 0 0 0
app/helpers/syncs_helper.rb 100.00 % 2 1 1 0 1.00 100.00 % 0 0 0
app/jobs/analyze_captured_instagram_profile_posts_job.rb 0.00 % 196 174 0 174 0.00 100.00 % 0 0 0
app/jobs/analyze_instagram_post_job.rb 0.00 % 165 133 0 133 0.00 100.00 % 0 0 0
app/jobs/analyze_instagram_profile_job.rb 0.00 % 905 777 0 777 0.00 100.00 % 0 0 0
app/jobs/analyze_instagram_profile_post_job.rb 0.00 % 413 363 0 363 0.00 100.00 % 0 0 0
app/jobs/append_profile_history_narrative_job.rb 0.00 % 19 17 0 17 0.00 100.00 % 0 0 0
app/jobs/application_job.rb 0.00 % 239 210 0 210 0.00 100.00 % 0 0 0
app/jobs/auto_engage_home_feed_job.rb 0.00 % 31 29 0 29 0.00 100.00 % 0 0 0
app/jobs/build_instagram_profile_history_job.rb 0.00 % 424 380 0 380 0.00 100.00 % 0 0 0
app/jobs/capture_home_feed_job.rb 0.00 % 27 23 0 23 0.00 100.00 % 0 0 0
app/jobs/capture_instagram_profile_posts_job.rb 0.00 % 372 332 0 332 0.00 100.00 % 0 0 0
app/jobs/check_ai_microservice_health_job.rb 0.00 % 31 27 0 27 0.00 100.00 % 0 0 0
app/jobs/check_queue_health_job.rb 0.00 % 7 6 0 6 0.00 100.00 % 0 0 0
app/jobs/concerns/scheduled_account_batching.rb 0.00 % 46 37 0 37 0.00 100.00 % 0 0 0
app/jobs/download_instagram_post_media_job.rb 0.00 % 176 147 0 147 0.00 100.00 % 0 0 0
app/jobs/download_instagram_profile_avatar_job.rb 0.00 % 203 167 0 167 0.00 100.00 % 0 0 0
app/jobs/download_instagram_profile_post_media_job.rb 0.00 % 667 577 0 577 0.00 100.00 % 0 0 0
app/jobs/download_missing_avatars_job.rb 0.00 % 41 36 0 36 0.00 100.00 % 0 0 0
app/jobs/enqueue_avatar_sync_for_all_accounts_job.rb 0.00 % 52 45 0 45 0.00 100.00 % 0 0 0
app/jobs/enqueue_continuous_account_processing_job.rb 0.00 % 88 79 0 79 0.00 100.00 % 0 0 0
app/jobs/enqueue_feed_auto_engagement_for_all_accounts_job.rb 0.00 % 86 77 0 77 0.00 100.00 % 0 0 0
app/jobs/enqueue_follow_graph_sync_for_all_accounts_job.rb 0.00 % 46 38 0 38 0.00 100.00 % 0 0 0
app/jobs/enqueue_profile_refresh_for_all_accounts_job.rb 0.00 % 52 45 0 45 0.00 100.00 % 0 0 0
app/jobs/enqueue_recent_profile_post_scans_for_account_job.rb 0.00 % 205 176 0 176 0.00 100.00 % 0 0 0
app/jobs/enqueue_recent_profile_post_scans_for_all_accounts_job.rb 0.00 % 87 77 0 77 0.00 100.00 % 0 0 0
app/jobs/enqueue_story_auto_replies_for_all_accounts_job.rb 0.00 % 88 79 0 79 0.00 100.00 % 0 0 0
app/jobs/fetch_instagram_profile_details_job.rb 0.00 % 148 129 0 129 0.00 100.00 % 0 0 0
app/jobs/finalize_post_analysis_pipeline_job.rb 0.00 % 357 308 0 308 0.00 100.00 % 0 0 0
app/jobs/generate_llm_comment_job.rb 0.00 % 204 182 0 182 0.00 100.00 % 0 0 0
app/jobs/generate_profile_post_preview_image_job.rb 0.00 % 36 30 0 30 0.00 100.00 % 0 0 0
app/jobs/generate_story_preview_image_job.rb 0.00 % 28 22 0 22 0.00 100.00 % 0 0 0
app/jobs/post_analysis_pipeline_job.rb 0.00 % 38 34 0 34 0.00 100.00 % 0 0 0
app/jobs/post_instagram_profile_comment_job.rb 0.00 % 51 46 0 46 0.00 100.00 % 0 0 0
app/jobs/process_instagram_account_continuously_job.rb 0.00 % 172 146 0 146 0.00 100.00 % 0 0 0
app/jobs/process_post_face_analysis_job.rb 0.00 % 86 77 0 77 0.00 100.00 % 0 0 0
app/jobs/process_post_metadata_tagging_job.rb 0.00 % 257 231 0 231 0.00 100.00 % 0 0 0
app/jobs/process_post_ocr_analysis_job.rb 0.00 % 208 183 0 183 0.00 100.00 % 0 0 0
app/jobs/process_post_video_analysis_job.rb 0.00 % 261 229 0 229 0.00 100.00 % 0 0 0
app/jobs/process_post_visual_analysis_job.rb 0.00 % 210 186 0 186 0.00 100.00 % 0 0 0
app/jobs/purge_expired_instagram_post_media_job.rb 0.00 % 25 21 0 21 0.00 100.00 % 0 0 0
app/jobs/refresh_account_audit_logs_job.rb 0.00 % 42 35 0 35 0.00 100.00 % 0 0 0
app/jobs/refresh_profile_post_face_identity_job.rb 0.00 % 85 74 0 74 0.00 100.00 % 0 0 0
app/jobs/retry_failed_background_jobs_job.rb 0.00 % 19 16 0 16 0.00 100.00 % 0 0 0
app/jobs/send_instagram_message_job.rb 0.00 % 39 31 0 31 0.00 100.00 % 0 0 0
app/jobs/story_processing_job.rb 0.00 % 8 7 0 7 0.00 100.00 % 0 0 0
app/jobs/sync_all_home_stories_job.rb 0.00 % 64 53 0 53 0.00 100.00 % 0 0 0
app/jobs/sync_follow_graph_job.rb 0.00 % 56 46 0 46 0.00 100.00 % 0 0 0
app/jobs/sync_home_story_carousel_job.rb 0.00 % 48 42 0 42 0.00 100.00 % 0 0 0
app/jobs/sync_instagram_profile_stories_job.rb 0.00 % 1128 983 0 983 0.00 100.00 % 0 0 0
app/jobs/sync_next_profiles_for_account_job.rb 0.00 % 48 43 0 43 0.00 100.00 % 0 0 0
app/jobs/sync_profile_stories_for_account_job.rb 0.00 % 83 76 0 76 0.00 100.00 % 0 0 0
app/jobs/sync_recent_profile_posts_for_profile_job.rb 0.00 % 351 307 0 307 0.00 100.00 % 0 0 0
app/jobs/verify_instagram_messageability_job.rb 0.00 % 63 57 0 57 0.00 100.00 % 0 0 0
app/jobs/workspace_process_actions_todo_post_job.rb 0.00 % 517 450 0 450 0.00 100.00 % 0 0 0
app/mailers/application_mailer.rb 0.00 % 4 4 0 4 0.00 100.00 % 0 0 0
app/models/active_storage_ingestion.rb 0.00 % 82 70 0 70 0.00 100.00 % 0 0 0
app/models/ai_analysis.rb 0.00 % 26 23 0 23 0.00 100.00 % 0 0 0
app/models/ai_api_call.rb 0.00 % 15 12 0 12 0.00 100.00 % 0 0 0
app/models/ai_provider_setting.rb 0.00 % 54 43 0 43 0.00 100.00 % 0 0 0
app/models/app_issue.rb 0.00 % 64 54 0 54 0.00 100.00 % 0 0 0
app/models/application_record.rb 0.00 % 3 3 0 3 0.00 100.00 % 0 0 0
app/models/background_job_failure.rb 0.00 % 47 38 0 38 0.00 100.00 % 0 0 0
app/models/concerns/active_storage_ingestion_tracking.rb 85.71 % 13 7 6 1 0.86 100.00 % 0 0 0
app/models/conversation_peer.rb 0.00 % 5 4 0 4 0.00 100.00 % 0 0 0
app/models/current.rb 0.00 % 19 18 0 18 0.00 100.00 % 0 0 0
app/models/instagram_account.rb 0.00 % 134 109 0 109 0.00 100.00 % 0 0 0
app/models/instagram_message.rb 0.00 % 21 15 0 15 0.00 100.00 % 0 0 0
app/models/instagram_post.rb 0.00 % 31 25 0 25 0.00 100.00 % 0 0 0
app/models/instagram_post_entity.rb 0.00 % 10 8 0 8 0.00 100.00 % 0 0 0
app/models/instagram_post_face.rb 0.00 % 8 6 0 6 0.00 100.00 % 0 0 0
app/models/instagram_post_insight.rb 0.00 % 9 7 0 7 0.00 100.00 % 0 0 0
app/models/instagram_profile.rb 0.00 % 109 92 0 92 0.00 100.00 % 0 0 0
app/models/instagram_profile_action_log.rb 0.00 % 76 62 0 62 0.00 100.00 % 0 0 0
app/models/instagram_profile_analysis.rb 0.00 % 13 8 0 8 0.00 100.00 % 0 0 0
app/models/instagram_profile_behavior_profile.rb 0.00 % 3 3 0 3 0.00 100.00 % 0 0 0
app/models/instagram_profile_event.rb 0.00 % 1661 1485 0 1485 0.00 100.00 % 0 0 0
app/models/instagram_profile_history_chunk.rb 0.00 % 11 9 0 9 0.00 100.00 % 0 0 0
app/models/instagram_profile_insight.rb 0.00 % 12 9 0 9 0.00 100.00 % 0 0 0
app/models/instagram_profile_message_strategy.rb 0.00 % 8 7 0 7 0.00 100.00 % 0 0 0
app/models/instagram_profile_post.rb 0.00 % 23 18 0 18 0.00 100.00 % 0 0 0
app/models/instagram_profile_post_comment.rb 0.00 % 8 6 0 6 0.00 100.00 % 0 0 0
app/models/instagram_profile_signal_evidence.rb 0.00 % 10 8 0 8 0.00 100.00 % 0 0 0
app/models/instagram_profile_tagging.rb 0.00 % 7 5 0 5 0.00 100.00 % 0 0 0
app/models/instagram_story.rb 0.00 % 22 17 0 17 0.00 100.00 % 0 0 0
app/models/instagram_story_face.rb 0.00 % 8 6 0 6 0.00 100.00 % 0 0 0
app/models/instagram_story_person.rb 0.00 % 82 63 0 63 0.00 100.00 % 0 0 0
app/models/profile_tag.rb 0.00 % 11 8 0 8 0.00 100.00 % 0 0 0
app/models/sync_run.rb 0.00 % 19 14 0 14 0.00 100.00 % 0 0 0
app/services/ai/api_usage_tracker.rb 0.00 % 94 83 0 83 0.00 100.00 % 0 0 0
app/services/ai/comment_relevance_scorer.rb 0.00 % 66 55 0 55 0.00 100.00 % 0 0 0
app/services/ai/insight_sync.rb 0.00 % 224 196 0 196 0.00 100.00 % 0 0 0
app/services/ai/local_engagement_comment_generator.rb 0.00 % 513 462 0 462 0.00 100.00 % 0 0 0
app/services/ai/local_microservice_client.rb 0.00 % 727 597 0 597 0.00 100.00 % 0 0 0
app/services/ai/ollama_client.rb 0.00 % 134 109 0 109 0.00 100.00 % 0 0 0
app/services/ai/post_analysis_context_builder.rb 0.00 % 293 258 0 258 0.00 100.00 % 0 0 0
app/services/ai/post_analysis_pipeline_state.rb 0.00 % 288 239 0 239 0.00 100.00 % 0 0 0
app/services/ai/post_analyzer.rb 0.00 % 89 74 0 74 0.00 100.00 % 0 0 0
app/services/ai/post_comment_generation_service.rb 0.00 % 439 382 0 382 0.00 100.00 % 0 0 0
app/services/ai/post_ocr_service.rb 0.00 % 91 79 0 79 0.00 100.00 % 0 0 0
app/services/ai/profile_analyzer.rb 0.00 % 92 77 0 77 0.00 100.00 % 0 0 0
app/services/ai/profile_auto_tagger.rb 0.00 % 54 45 0 45 0.00 100.00 % 0 0 0
app/services/ai/profile_comment_preparation_service.rb 0.00 % 372 329 0 329 0.00 100.00 % 0 0 0
app/services/ai/profile_demographics_aggregator.rb 0.00 % 213 178 0 178 0.00 100.00 % 0 0 0
app/services/ai/profile_history_build_service.rb 0.00 % 871 782 0 782 0.00 100.00 % 0 0 0
app/services/ai/profile_history_narrative_builder.rb 0.00 % 184 158 0 158 0.00 100.00 % 0 0 0
app/services/ai/provider_registry.rb 0.00 % 57 48 0 48 0.00 100.00 % 0 0 0
app/services/ai/providers/base_provider.rb 0.00 % 69 52 0 52 0.00 100.00 % 0 0 0
app/services/ai/providers/local_provider.rb 0.00 % 784 689 0 689 0.00 100.00 % 0 0 0
app/services/ai/runner.rb 0.00 % 349 289 0 289 0.00 100.00 % 0 0 0
app/services/ai/verified_story_insight_builder.rb 0.00 % 685 610 0 610 0.00 100.00 % 0 0 0
app/services/face_detection_service.rb 0.00 % 368 321 0 321 0.00 100.00 % 0 0 0
app/services/face_embedding_service.rb 0.00 % 93 77 0 77 0.00 100.00 % 0 0 0
app/services/face_identity_resolution_service.rb 0.00 % 629 526 0 526 0.00 100.00 % 0 0 0
app/services/instagram/authentication_required_error.rb 0.00 % 4 4 0 4 0.00 100.00 % 0 0 0
app/services/instagram/avatar_url_normalizer.rb 0.00 % 45 37 0 37 0.00 100.00 % 0 0 0
app/services/instagram/client.rb 0.00 % 7341 6278 0 6278 0.00 100.00 % 0 0 0
app/services/instagram/client/bulk_message_send_service.rb 0.00 % 114 102 0 102 0.00 100.00 % 0 0 0
app/services/instagram/client/profile_analysis_dataset_service.rb 0.00 % 65 57 0 57 0.00 100.00 % 0 0 0
app/services/instagram/client/session_validation_service.rb 0.00 % 138 116 0 116 0.00 100.00 % 0 0 0
app/services/instagram/client/single_message_send_service.rb 0.00 % 97 89 0 89 0.00 100.00 % 0 0 0
app/services/instagram/client/sync_data_service.rb 0.00 % 73 64 0 64 0.00 100.00 % 0 0 0
app/services/instagram/client/sync_follow_graph_service.rb 0.00 % 102 86 0 86 0.00 100.00 % 0 0 0
app/services/instagram/profile_analysis_collector.rb 0.00 % 524 443 0 443 0.00 100.00 % 0 0 0
app/services/instagram/profile_scan_policy.rb 0.00 % 245 209 0 209 0.00 100.00 % 0 0 0
app/services/instagram_accounts/dashboard_snapshot_service.rb 0.00 % 69 62 0 62 0.00 100.00 % 0 0 0
app/services/instagram_accounts/llm_comment_request_service.rb 0.00 % 141 123 0 123 0.00 100.00 % 0 0 0
app/services/instagram_accounts/llm_queue_inspector.rb 0.00 % 67 55 0 55 0.00 100.00 % 0 0 0
app/services/instagram_accounts/skip_diagnostics_service.rb 0.00 % 76 65 0 65 0.00 100.00 % 0 0 0
app/services/instagram_accounts/story_archive_item_serializer.rb 0.00 % 129 112 0 112 0.00 100.00 % 0 0 0
app/services/instagram_accounts/story_archive_query.rb 0.00 % 73 61 0 61 0.00 100.00 % 0 0 0
app/services/instagram_accounts/technical_details_payload_service.rb 0.00 % 73 62 0 62 0.00 100.00 % 0 0 0
app/services/instagram_profiles/events_query.rb 0.00 % 89 72 0 72 0.00 100.00 % 0 0 0
app/services/instagram_profiles/mutual_friends_resolver.rb 0.00 % 47 40 0 40 0.00 100.00 % 0 0 0
app/services/instagram_profiles/profiles_index_query.rb 0.00 % 173 153 0 153 0.00 100.00 % 0 0 0
app/services/instagram_profiles/show_snapshot_service.rb 0.00 % 73 63 0 63 0.00 100.00 % 0 0 0
app/services/instagram_profiles/tabulator_events_payload_builder.rb 0.00 % 71 59 0 59 0.00 100.00 % 0 0 0
app/services/instagram_profiles/tabulator_params.rb 0.00 % 70 59 0 59 0.00 100.00 % 0 0 0
app/services/instagram_profiles/tabulator_profiles_payload_builder.rb 0.00 % 48 43 0 43 0.00 100.00 % 0 0 0
app/services/jobs/context_extractor.rb 0.00 % 92 77 0 77 0.00 100.00 % 0 0 0
app/services/jobs/failure_retry.rb 0.00 % 215 172 0 172 0.00 100.00 % 0 0 0
app/services/messaging/integration_service.rb 0.00 % 45 38 0 38 0.00 100.00 % 0 0 0
app/services/ops/account_issues.rb 0.00 % 44 37 0 37 0.00 100.00 % 0 0 0
app/services/ops/audit_log_builder.rb 0.00 % 64 60 0 60 0.00 100.00 % 0 0 0
app/services/ops/issue_tracker.rb 0.00 % 150 132 0 132 0.00 100.00 % 0 0 0
app/services/ops/live_update_broadcaster.rb 0.00 % 56 46 0 46 0.00 100.00 % 0 0 0
app/services/ops/local_ai_health.rb 0.00 % 130 110 0 110 0.00 100.00 % 0 0 0
app/services/ops/local_story_intelligence_backfill.rb 0.00 % 188 160 0 160 0.00 100.00 % 0 0 0
app/services/ops/metrics.rb 0.00 % 179 164 0 164 0.00 100.00 % 0 0 0
app/services/ops/queue_health.rb 0.00 % 49 43 0 43 0.00 100.00 % 0 0 0
app/services/ops/resource_guard.rb 0.00 % 130 111 0 111 0.00 100.00 % 0 0 0
app/services/ops/structured_logger.rb 0.00 % 41 34 0 34 0.00 100.00 % 0 0 0
app/services/person_identity_feedback_service.rb 0.00 % 353 298 0 298 0.00 100.00 % 0 0 0
app/services/personalization_engine.rb 0.00 % 47 39 0 39 0.00 100.00 % 0 0 0
app/services/pipeline/account_processing_coordinator.rb 0.00 % 244 211 0 211 0.00 100.00 % 0 0 0
app/services/post_face_recognition_service.rb 0.00 % 389 356 0 356 0.00 100.00 % 0 0 0
app/services/post_video_context_extraction_service.rb 0.00 % 391 356 0 356 0.00 100.00 % 0 0 0
app/services/response_generation_service.rb 0.00 % 58 49 0 49 0.00 100.00 % 0 0 0
app/services/speech_transcription_service.rb 0.00 % 136 112 0 112 0.00 100.00 % 0 0 0
app/services/story_archive/media_preview_resolver.rb 0.00 % 45 37 0 37 0.00 100.00 % 0 0 0
app/services/story_content_understanding_service.rb 0.00 % 67 57 0 57 0.00 100.00 % 0 0 0
app/services/story_ingestion_service.rb 0.00 % 96 83 0 83 0.00 100.00 % 0 0 0
app/services/story_processing_service.rb 0.00 % 486 433 0 433 0.00 100.00 % 0 0 0
app/services/user_profile_builder_service.rb 0.00 % 101 87 0 87 0.00 100.00 % 0 0 0
app/services/vector_matching_service.rb 0.00 % 210 176 0 176 0.00 100.00 % 0 0 0
app/services/video_audio_extraction_service.rb 0.00 % 74 62 0 62 0.00 100.00 % 0 0 0
app/services/video_frame_change_detector_service.rb 0.00 % 230 203 0 203 0.00 100.00 % 0 0 0
app/services/video_frame_extraction_service.rb 0.00 % 88 74 0 74 0.00 100.00 % 0 0 0
app/services/video_metadata_service.rb 0.00 % 95 82 0 82 0.00 100.00 % 0 0 0
app/services/video_thumbnail_service.rb 0.00 % 103 87 0 87 0.00 100.00 % 0 0 0
app/services/workspace/actions_todo_queue_service.rb 0.00 % 266 227 0 227 0.00 100.00 % 0 0 0
lib/tasks/story_debug_analyzer.rb 0.00 % 182 130 0 130 0.00 100.00 % 0 0 0
lib/tasks/story_network_analyzer.rb 0.00 % 178 146 0 146 0.00 100.00 % 0 0 0

Controllers ( 0.0% covered at 0.0 hits/line )

17 files in total.
3024 relevant lines, 0 lines covered and 3024 lines missed. ( 0.0% )
0 total branches, 0 branches covered and 0 branches missed. ( 100.0% )
File % covered Lines Relevant Lines Lines covered Lines missed Avg. Hits / Line Branch Coverage Branches Covered branches Missed branches
app/controllers/admin/background_jobs_controller.rb 0.00 % 892 772 0 772 0.00 100.00 % 0 0 0
app/controllers/admin/base_controller.rb 0.00 % 23 17 0 17 0.00 100.00 % 0 0 0
app/controllers/admin/issues_controller.rb 0.00 % 194 169 0 169 0.00 100.00 % 0 0 0
app/controllers/admin/storage_ingestions_controller.rb 0.00 % 149 131 0 131 0.00 100.00 % 0 0 0
app/controllers/ai_dashboard_controller.rb 0.00 % 419 344 0 344 0.00 100.00 % 0 0 0
app/controllers/application_controller.rb 0.00 % 33 22 0 22 0.00 100.00 % 0 0 0
app/controllers/concerns/profile_post_preview_support.rb 0.00 % 51 41 0 41 0.00 100.00 % 0 0 0
app/controllers/feed_captures_controller.rb 0.00 % 40 37 0 37 0.00 100.00 % 0 0 0
app/controllers/follow_graph_syncs_controller.rb 0.00 % 39 37 0 37 0.00 100.00 % 0 0 0
app/controllers/instagram_accounts_controller.rb 0.00 % 357 319 0 319 0.00 100.00 % 0 0 0
app/controllers/instagram_posts_controller.rb 0.00 % 156 131 0 131 0.00 100.00 % 0 0 0
app/controllers/instagram_profile_actions_controller.rb 0.00 % 403 369 0 369 0.00 100.00 % 0 0 0
app/controllers/instagram_profile_messages_controller.rb 0.00 % 39 35 0 35 0.00 100.00 % 0 0 0
app/controllers/instagram_profile_posts_controller.rb 0.00 % 278 249 0 249 0.00 100.00 % 0 0 0
app/controllers/instagram_profiles_controller.rb 0.00 % 194 171 0 171 0.00 100.00 % 0 0 0
app/controllers/instagram_story_people_controller.rb 0.00 % 170 149 0 149 0.00 100.00 % 0 0 0
app/controllers/workspaces_controller.rb 0.00 % 40 31 0 31 0.00 100.00 % 0 0 0

Channels ( 0.0% covered at 0.0 hits/line )

4 files in total.
50 relevant lines, 0 lines covered and 50 lines missed. ( 0.0% )
0 total branches, 0 branches covered and 0 branches missed. ( 100.0% )
File % covered Lines Relevant Lines Lines covered Lines missed Avg. Hits / Line Branch Coverage Branches Covered branches Missed branches
app/channels/application_cable/channel.rb 0.00 % 4 4 0 4 0.00 100.00 % 0 0 0
app/channels/application_cable/connection.rb 0.00 % 23 19 0 19 0.00 100.00 % 0 0 0
app/channels/llm_comment_generation_channel.rb 0.00 % 15 12 0 12 0.00 100.00 % 0 0 0
app/channels/operations_channel.rb 0.00 % 18 15 0 15 0.00 100.00 % 0 0 0

Models ( 0.27% covered at 0.0 hits/line )

33 files in total.
2262 relevant lines, 6 lines covered and 2256 lines missed. ( 0.27% )
0 total branches, 0 branches covered and 0 branches missed. ( 100.0% )
File % covered Lines Relevant Lines Lines covered Lines missed Avg. Hits / Line Branch Coverage Branches Covered branches Missed branches
app/models/active_storage_ingestion.rb 0.00 % 82 70 0 70 0.00 100.00 % 0 0 0
app/models/ai_analysis.rb 0.00 % 26 23 0 23 0.00 100.00 % 0 0 0
app/models/ai_api_call.rb 0.00 % 15 12 0 12 0.00 100.00 % 0 0 0
app/models/ai_provider_setting.rb 0.00 % 54 43 0 43 0.00 100.00 % 0 0 0
app/models/app_issue.rb 0.00 % 64 54 0 54 0.00 100.00 % 0 0 0
app/models/application_record.rb 0.00 % 3 3 0 3 0.00 100.00 % 0 0 0
app/models/background_job_failure.rb 0.00 % 47 38 0 38 0.00 100.00 % 0 0 0
app/models/concerns/active_storage_ingestion_tracking.rb 85.71 % 13 7 6 1 0.86 100.00 % 0 0 0
app/models/conversation_peer.rb 0.00 % 5 4 0 4 0.00 100.00 % 0 0 0
app/models/current.rb 0.00 % 19 18 0 18 0.00 100.00 % 0 0 0
app/models/instagram_account.rb 0.00 % 134 109 0 109 0.00 100.00 % 0 0 0
app/models/instagram_message.rb 0.00 % 21 15 0 15 0.00 100.00 % 0 0 0
app/models/instagram_post.rb 0.00 % 31 25 0 25 0.00 100.00 % 0 0 0
app/models/instagram_post_entity.rb 0.00 % 10 8 0 8 0.00 100.00 % 0 0 0
app/models/instagram_post_face.rb 0.00 % 8 6 0 6 0.00 100.00 % 0 0 0
app/models/instagram_post_insight.rb 0.00 % 9 7 0 7 0.00 100.00 % 0 0 0
app/models/instagram_profile.rb 0.00 % 109 92 0 92 0.00 100.00 % 0 0 0
app/models/instagram_profile_action_log.rb 0.00 % 76 62 0 62 0.00 100.00 % 0 0 0
app/models/instagram_profile_analysis.rb 0.00 % 13 8 0 8 0.00 100.00 % 0 0 0
app/models/instagram_profile_behavior_profile.rb 0.00 % 3 3 0 3 0.00 100.00 % 0 0 0
app/models/instagram_profile_event.rb 0.00 % 1661 1485 0 1485 0.00 100.00 % 0 0 0
app/models/instagram_profile_history_chunk.rb 0.00 % 11 9 0 9 0.00 100.00 % 0 0 0
app/models/instagram_profile_insight.rb 0.00 % 12 9 0 9 0.00 100.00 % 0 0 0
app/models/instagram_profile_message_strategy.rb 0.00 % 8 7 0 7 0.00 100.00 % 0 0 0
app/models/instagram_profile_post.rb 0.00 % 23 18 0 18 0.00 100.00 % 0 0 0
app/models/instagram_profile_post_comment.rb 0.00 % 8 6 0 6 0.00 100.00 % 0 0 0
app/models/instagram_profile_signal_evidence.rb 0.00 % 10 8 0 8 0.00 100.00 % 0 0 0
app/models/instagram_profile_tagging.rb 0.00 % 7 5 0 5 0.00 100.00 % 0 0 0
app/models/instagram_story.rb 0.00 % 22 17 0 17 0.00 100.00 % 0 0 0
app/models/instagram_story_face.rb 0.00 % 8 6 0 6 0.00 100.00 % 0 0 0
app/models/instagram_story_person.rb 0.00 % 82 63 0 63 0.00 100.00 % 0 0 0
app/models/profile_tag.rb 0.00 % 11 8 0 8 0.00 100.00 % 0 0 0
app/models/sync_run.rb 0.00 % 19 14 0 14 0.00 100.00 % 0 0 0

Mailers ( 0.0% covered at 0.0 hits/line )

1 files in total.
4 relevant lines, 0 lines covered and 4 lines missed. ( 0.0% )
0 total branches, 0 branches covered and 0 branches missed. ( 100.0% )
File % covered Lines Relevant Lines Lines covered Lines missed Avg. Hits / Line Branch Coverage Branches Covered branches Missed branches
app/mailers/application_mailer.rb 0.00 % 4 4 0 4 0.00 100.00 % 0 0 0

Helpers ( 20.59% covered at 0.21 hits/line )

6 files in total.
68 relevant lines, 14 lines covered and 54 lines missed. ( 20.59% )
45 total branches, 0 branches covered and 45 branches missed. ( 0.0% )
File % covered Lines Relevant Lines Lines covered Lines missed Avg. Hits / Line Branch Coverage Branches Covered branches Missed branches
app/helpers/ai_dashboard_helper.rb 22.22 % 18 9 2 7 0.22 0.00 % 6 0 6
app/helpers/application_helper.rb 14.55 % 117 55 8 47 0.15 0.00 % 39 0 39
app/helpers/dashboard_helper.rb 100.00 % 2 1 1 0 1.00 100.00 % 0 0 0
app/helpers/instagram_accounts_helper.rb 100.00 % 2 1 1 0 1.00 100.00 % 0 0 0
app/helpers/messages_helper.rb 100.00 % 2 1 1 0 1.00 100.00 % 0 0 0
app/helpers/syncs_helper.rb 100.00 % 2 1 1 0 1.00 100.00 % 0 0 0

Jobs ( 0.0% covered at 0.0 hits/line )

53 files in total.
8095 relevant lines, 0 lines covered and 8095 lines missed. ( 0.0% )
0 total branches, 0 branches covered and 0 branches missed. ( 100.0% )
File % covered Lines Relevant Lines Lines covered Lines missed Avg. Hits / Line Branch Coverage Branches Covered branches Missed branches
app/jobs/analyze_captured_instagram_profile_posts_job.rb 0.00 % 196 174 0 174 0.00 100.00 % 0 0 0
app/jobs/analyze_instagram_post_job.rb 0.00 % 165 133 0 133 0.00 100.00 % 0 0 0
app/jobs/analyze_instagram_profile_job.rb 0.00 % 905 777 0 777 0.00 100.00 % 0 0 0
app/jobs/analyze_instagram_profile_post_job.rb 0.00 % 413 363 0 363 0.00 100.00 % 0 0 0
app/jobs/append_profile_history_narrative_job.rb 0.00 % 19 17 0 17 0.00 100.00 % 0 0 0
app/jobs/application_job.rb 0.00 % 239 210 0 210 0.00 100.00 % 0 0 0
app/jobs/auto_engage_home_feed_job.rb 0.00 % 31 29 0 29 0.00 100.00 % 0 0 0
app/jobs/build_instagram_profile_history_job.rb 0.00 % 424 380 0 380 0.00 100.00 % 0 0 0
app/jobs/capture_home_feed_job.rb 0.00 % 27 23 0 23 0.00 100.00 % 0 0 0
app/jobs/capture_instagram_profile_posts_job.rb 0.00 % 372 332 0 332 0.00 100.00 % 0 0 0
app/jobs/check_ai_microservice_health_job.rb 0.00 % 31 27 0 27 0.00 100.00 % 0 0 0
app/jobs/check_queue_health_job.rb 0.00 % 7 6 0 6 0.00 100.00 % 0 0 0
app/jobs/concerns/scheduled_account_batching.rb 0.00 % 46 37 0 37 0.00 100.00 % 0 0 0
app/jobs/download_instagram_post_media_job.rb 0.00 % 176 147 0 147 0.00 100.00 % 0 0 0
app/jobs/download_instagram_profile_avatar_job.rb 0.00 % 203 167 0 167 0.00 100.00 % 0 0 0
app/jobs/download_instagram_profile_post_media_job.rb 0.00 % 667 577 0 577 0.00 100.00 % 0 0 0
app/jobs/download_missing_avatars_job.rb 0.00 % 41 36 0 36 0.00 100.00 % 0 0 0
app/jobs/enqueue_avatar_sync_for_all_accounts_job.rb 0.00 % 52 45 0 45 0.00 100.00 % 0 0 0
app/jobs/enqueue_continuous_account_processing_job.rb 0.00 % 88 79 0 79 0.00 100.00 % 0 0 0
app/jobs/enqueue_feed_auto_engagement_for_all_accounts_job.rb 0.00 % 86 77 0 77 0.00 100.00 % 0 0 0
app/jobs/enqueue_follow_graph_sync_for_all_accounts_job.rb 0.00 % 46 38 0 38 0.00 100.00 % 0 0 0
app/jobs/enqueue_profile_refresh_for_all_accounts_job.rb 0.00 % 52 45 0 45 0.00 100.00 % 0 0 0
app/jobs/enqueue_recent_profile_post_scans_for_account_job.rb 0.00 % 205 176 0 176 0.00 100.00 % 0 0 0
app/jobs/enqueue_recent_profile_post_scans_for_all_accounts_job.rb 0.00 % 87 77 0 77 0.00 100.00 % 0 0 0
app/jobs/enqueue_story_auto_replies_for_all_accounts_job.rb 0.00 % 88 79 0 79 0.00 100.00 % 0 0 0
app/jobs/fetch_instagram_profile_details_job.rb 0.00 % 148 129 0 129 0.00 100.00 % 0 0 0
app/jobs/finalize_post_analysis_pipeline_job.rb 0.00 % 357 308 0 308 0.00 100.00 % 0 0 0
app/jobs/generate_llm_comment_job.rb 0.00 % 204 182 0 182 0.00 100.00 % 0 0 0
app/jobs/generate_profile_post_preview_image_job.rb 0.00 % 36 30 0 30 0.00 100.00 % 0 0 0
app/jobs/generate_story_preview_image_job.rb 0.00 % 28 22 0 22 0.00 100.00 % 0 0 0
app/jobs/post_analysis_pipeline_job.rb 0.00 % 38 34 0 34 0.00 100.00 % 0 0 0
app/jobs/post_instagram_profile_comment_job.rb 0.00 % 51 46 0 46 0.00 100.00 % 0 0 0
app/jobs/process_instagram_account_continuously_job.rb 0.00 % 172 146 0 146 0.00 100.00 % 0 0 0
app/jobs/process_post_face_analysis_job.rb 0.00 % 86 77 0 77 0.00 100.00 % 0 0 0
app/jobs/process_post_metadata_tagging_job.rb 0.00 % 257 231 0 231 0.00 100.00 % 0 0 0
app/jobs/process_post_ocr_analysis_job.rb 0.00 % 208 183 0 183 0.00 100.00 % 0 0 0
app/jobs/process_post_video_analysis_job.rb 0.00 % 261 229 0 229 0.00 100.00 % 0 0 0
app/jobs/process_post_visual_analysis_job.rb 0.00 % 210 186 0 186 0.00 100.00 % 0 0 0
app/jobs/purge_expired_instagram_post_media_job.rb 0.00 % 25 21 0 21 0.00 100.00 % 0 0 0
app/jobs/refresh_account_audit_logs_job.rb 0.00 % 42 35 0 35 0.00 100.00 % 0 0 0
app/jobs/refresh_profile_post_face_identity_job.rb 0.00 % 85 74 0 74 0.00 100.00 % 0 0 0
app/jobs/retry_failed_background_jobs_job.rb 0.00 % 19 16 0 16 0.00 100.00 % 0 0 0
app/jobs/send_instagram_message_job.rb 0.00 % 39 31 0 31 0.00 100.00 % 0 0 0
app/jobs/story_processing_job.rb 0.00 % 8 7 0 7 0.00 100.00 % 0 0 0
app/jobs/sync_all_home_stories_job.rb 0.00 % 64 53 0 53 0.00 100.00 % 0 0 0
app/jobs/sync_follow_graph_job.rb 0.00 % 56 46 0 46 0.00 100.00 % 0 0 0
app/jobs/sync_home_story_carousel_job.rb 0.00 % 48 42 0 42 0.00 100.00 % 0 0 0
app/jobs/sync_instagram_profile_stories_job.rb 0.00 % 1128 983 0 983 0.00 100.00 % 0 0 0
app/jobs/sync_next_profiles_for_account_job.rb 0.00 % 48 43 0 43 0.00 100.00 % 0 0 0
app/jobs/sync_profile_stories_for_account_job.rb 0.00 % 83 76 0 76 0.00 100.00 % 0 0 0
app/jobs/sync_recent_profile_posts_for_profile_job.rb 0.00 % 351 307 0 307 0.00 100.00 % 0 0 0
app/jobs/verify_instagram_messageability_job.rb 0.00 % 63 57 0 57 0.00 100.00 % 0 0 0
app/jobs/workspace_process_actions_todo_post_job.rb 0.00 % 517 450 0 450 0.00 100.00 % 0 0 0

Libraries ( 0.0% covered at 0.0 hits/line )

2 files in total.
276 relevant lines, 0 lines covered and 276 lines missed. ( 0.0% )
0 total branches, 0 branches covered and 0 branches missed. ( 100.0% )
File % covered Lines Relevant Lines Lines covered Lines missed Avg. Hits / Line Branch Coverage Branches Covered branches Missed branches
lib/tasks/story_debug_analyzer.rb 0.00 % 182 130 0 130 0.00 100.00 % 0 0 0
lib/tasks/story_network_analyzer.rb 0.00 % 178 146 0 146 0.00 100.00 % 0 0 0

Ungrouped ( 0.0% covered at 0.0 hits/line )

82 files in total.
19442 relevant lines, 0 lines covered and 19442 lines missed. ( 0.0% )
0 total branches, 0 branches covered and 0 branches missed. ( 100.0% )
File % covered Lines Relevant Lines Lines covered Lines missed Avg. Hits / Line Branch Coverage Branches Covered branches Missed branches
app/services/ai/api_usage_tracker.rb 0.00 % 94 83 0 83 0.00 100.00 % 0 0 0
app/services/ai/comment_relevance_scorer.rb 0.00 % 66 55 0 55 0.00 100.00 % 0 0 0
app/services/ai/insight_sync.rb 0.00 % 224 196 0 196 0.00 100.00 % 0 0 0
app/services/ai/local_engagement_comment_generator.rb 0.00 % 513 462 0 462 0.00 100.00 % 0 0 0
app/services/ai/local_microservice_client.rb 0.00 % 727 597 0 597 0.00 100.00 % 0 0 0
app/services/ai/ollama_client.rb 0.00 % 134 109 0 109 0.00 100.00 % 0 0 0
app/services/ai/post_analysis_context_builder.rb 0.00 % 293 258 0 258 0.00 100.00 % 0 0 0
app/services/ai/post_analysis_pipeline_state.rb 0.00 % 288 239 0 239 0.00 100.00 % 0 0 0
app/services/ai/post_analyzer.rb 0.00 % 89 74 0 74 0.00 100.00 % 0 0 0
app/services/ai/post_comment_generation_service.rb 0.00 % 439 382 0 382 0.00 100.00 % 0 0 0
app/services/ai/post_ocr_service.rb 0.00 % 91 79 0 79 0.00 100.00 % 0 0 0
app/services/ai/profile_analyzer.rb 0.00 % 92 77 0 77 0.00 100.00 % 0 0 0
app/services/ai/profile_auto_tagger.rb 0.00 % 54 45 0 45 0.00 100.00 % 0 0 0
app/services/ai/profile_comment_preparation_service.rb 0.00 % 372 329 0 329 0.00 100.00 % 0 0 0
app/services/ai/profile_demographics_aggregator.rb 0.00 % 213 178 0 178 0.00 100.00 % 0 0 0
app/services/ai/profile_history_build_service.rb 0.00 % 871 782 0 782 0.00 100.00 % 0 0 0
app/services/ai/profile_history_narrative_builder.rb 0.00 % 184 158 0 158 0.00 100.00 % 0 0 0
app/services/ai/provider_registry.rb 0.00 % 57 48 0 48 0.00 100.00 % 0 0 0
app/services/ai/providers/base_provider.rb 0.00 % 69 52 0 52 0.00 100.00 % 0 0 0
app/services/ai/providers/local_provider.rb 0.00 % 784 689 0 689 0.00 100.00 % 0 0 0
app/services/ai/runner.rb 0.00 % 349 289 0 289 0.00 100.00 % 0 0 0
app/services/ai/verified_story_insight_builder.rb 0.00 % 685 610 0 610 0.00 100.00 % 0 0 0
app/services/face_detection_service.rb 0.00 % 368 321 0 321 0.00 100.00 % 0 0 0
app/services/face_embedding_service.rb 0.00 % 93 77 0 77 0.00 100.00 % 0 0 0
app/services/face_identity_resolution_service.rb 0.00 % 629 526 0 526 0.00 100.00 % 0 0 0
app/services/instagram/authentication_required_error.rb 0.00 % 4 4 0 4 0.00 100.00 % 0 0 0
app/services/instagram/avatar_url_normalizer.rb 0.00 % 45 37 0 37 0.00 100.00 % 0 0 0
app/services/instagram/client.rb 0.00 % 7341 6278 0 6278 0.00 100.00 % 0 0 0
app/services/instagram/client/bulk_message_send_service.rb 0.00 % 114 102 0 102 0.00 100.00 % 0 0 0
app/services/instagram/client/profile_analysis_dataset_service.rb 0.00 % 65 57 0 57 0.00 100.00 % 0 0 0
app/services/instagram/client/session_validation_service.rb 0.00 % 138 116 0 116 0.00 100.00 % 0 0 0
app/services/instagram/client/single_message_send_service.rb 0.00 % 97 89 0 89 0.00 100.00 % 0 0 0
app/services/instagram/client/sync_data_service.rb 0.00 % 73 64 0 64 0.00 100.00 % 0 0 0
app/services/instagram/client/sync_follow_graph_service.rb 0.00 % 102 86 0 86 0.00 100.00 % 0 0 0
app/services/instagram/profile_analysis_collector.rb 0.00 % 524 443 0 443 0.00 100.00 % 0 0 0
app/services/instagram/profile_scan_policy.rb 0.00 % 245 209 0 209 0.00 100.00 % 0 0 0
app/services/instagram_accounts/dashboard_snapshot_service.rb 0.00 % 69 62 0 62 0.00 100.00 % 0 0 0
app/services/instagram_accounts/llm_comment_request_service.rb 0.00 % 141 123 0 123 0.00 100.00 % 0 0 0
app/services/instagram_accounts/llm_queue_inspector.rb 0.00 % 67 55 0 55 0.00 100.00 % 0 0 0
app/services/instagram_accounts/skip_diagnostics_service.rb 0.00 % 76 65 0 65 0.00 100.00 % 0 0 0
app/services/instagram_accounts/story_archive_item_serializer.rb 0.00 % 129 112 0 112 0.00 100.00 % 0 0 0
app/services/instagram_accounts/story_archive_query.rb 0.00 % 73 61 0 61 0.00 100.00 % 0 0 0
app/services/instagram_accounts/technical_details_payload_service.rb 0.00 % 73 62 0 62 0.00 100.00 % 0 0 0
app/services/instagram_profiles/events_query.rb 0.00 % 89 72 0 72 0.00 100.00 % 0 0 0
app/services/instagram_profiles/mutual_friends_resolver.rb 0.00 % 47 40 0 40 0.00 100.00 % 0 0 0
app/services/instagram_profiles/profiles_index_query.rb 0.00 % 173 153 0 153 0.00 100.00 % 0 0 0
app/services/instagram_profiles/show_snapshot_service.rb 0.00 % 73 63 0 63 0.00 100.00 % 0 0 0
app/services/instagram_profiles/tabulator_events_payload_builder.rb 0.00 % 71 59 0 59 0.00 100.00 % 0 0 0
app/services/instagram_profiles/tabulator_params.rb 0.00 % 70 59 0 59 0.00 100.00 % 0 0 0
app/services/instagram_profiles/tabulator_profiles_payload_builder.rb 0.00 % 48 43 0 43 0.00 100.00 % 0 0 0
app/services/jobs/context_extractor.rb 0.00 % 92 77 0 77 0.00 100.00 % 0 0 0
app/services/jobs/failure_retry.rb 0.00 % 215 172 0 172 0.00 100.00 % 0 0 0
app/services/messaging/integration_service.rb 0.00 % 45 38 0 38 0.00 100.00 % 0 0 0
app/services/ops/account_issues.rb 0.00 % 44 37 0 37 0.00 100.00 % 0 0 0
app/services/ops/audit_log_builder.rb 0.00 % 64 60 0 60 0.00 100.00 % 0 0 0
app/services/ops/issue_tracker.rb 0.00 % 150 132 0 132 0.00 100.00 % 0 0 0
app/services/ops/live_update_broadcaster.rb 0.00 % 56 46 0 46 0.00 100.00 % 0 0 0
app/services/ops/local_ai_health.rb 0.00 % 130 110 0 110 0.00 100.00 % 0 0 0
app/services/ops/local_story_intelligence_backfill.rb 0.00 % 188 160 0 160 0.00 100.00 % 0 0 0
app/services/ops/metrics.rb 0.00 % 179 164 0 164 0.00 100.00 % 0 0 0
app/services/ops/queue_health.rb 0.00 % 49 43 0 43 0.00 100.00 % 0 0 0
app/services/ops/resource_guard.rb 0.00 % 130 111 0 111 0.00 100.00 % 0 0 0
app/services/ops/structured_logger.rb 0.00 % 41 34 0 34 0.00 100.00 % 0 0 0
app/services/person_identity_feedback_service.rb 0.00 % 353 298 0 298 0.00 100.00 % 0 0 0
app/services/personalization_engine.rb 0.00 % 47 39 0 39 0.00 100.00 % 0 0 0
app/services/pipeline/account_processing_coordinator.rb 0.00 % 244 211 0 211 0.00 100.00 % 0 0 0
app/services/post_face_recognition_service.rb 0.00 % 389 356 0 356 0.00 100.00 % 0 0 0
app/services/post_video_context_extraction_service.rb 0.00 % 391 356 0 356 0.00 100.00 % 0 0 0
app/services/response_generation_service.rb 0.00 % 58 49 0 49 0.00 100.00 % 0 0 0
app/services/speech_transcription_service.rb 0.00 % 136 112 0 112 0.00 100.00 % 0 0 0
app/services/story_archive/media_preview_resolver.rb 0.00 % 45 37 0 37 0.00 100.00 % 0 0 0
app/services/story_content_understanding_service.rb 0.00 % 67 57 0 57 0.00 100.00 % 0 0 0
app/services/story_ingestion_service.rb 0.00 % 96 83 0 83 0.00 100.00 % 0 0 0
app/services/story_processing_service.rb 0.00 % 486 433 0 433 0.00 100.00 % 0 0 0
app/services/user_profile_builder_service.rb 0.00 % 101 87 0 87 0.00 100.00 % 0 0 0
app/services/vector_matching_service.rb 0.00 % 210 176 0 176 0.00 100.00 % 0 0 0
app/services/video_audio_extraction_service.rb 0.00 % 74 62 0 62 0.00 100.00 % 0 0 0
app/services/video_frame_change_detector_service.rb 0.00 % 230 203 0 203 0.00 100.00 % 0 0 0
app/services/video_frame_extraction_service.rb 0.00 % 88 74 0 74 0.00 100.00 % 0 0 0
app/services/video_metadata_service.rb 0.00 % 95 82 0 82 0.00 100.00 % 0 0 0
app/services/video_thumbnail_service.rb 0.00 % 103 87 0 87 0.00 100.00 % 0 0 0
app/services/workspace/actions_todo_queue_service.rb 0.00 % 266 227 0 227 0.00 100.00 % 0 0 0

app/channels/application_cable/channel.rb

0.0% lines covered

100.0% branches covered

4 relevant lines. 0 lines covered and 4 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module ApplicationCable
  2. class Channel < ActionCable::Channel::Base
  3. end
  4. end

app/channels/application_cable/connection.rb

0.0% lines covered

100.0% branches covered

19 relevant lines. 0 lines covered and 19 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module ApplicationCable
  2. class Connection < ActionCable::Connection::Base
  3. identified_by :connection_id, :current_account_id
  4. def connect
  5. self.connection_id = SecureRandom.hex(8)
  6. self.current_account_id = resolve_current_account_id
  7. rescue StandardError
  8. self.current_account_id = nil
  9. end
  10. private
  11. def resolve_current_account_id
  12. selected_id = request.session[:instagram_account_id].to_i
  13. if selected_id.positive? && InstagramAccount.exists?(id: selected_id)
  14. return selected_id
  15. end
  16. InstagramAccount.order(:id).limit(1).pick(:id)
  17. end
  18. end
  19. end

app/channels/llm_comment_generation_channel.rb

0.0% lines covered

100.0% branches covered

12 relevant lines. 0 lines covered and 12 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class LlmCommentGenerationChannel < ApplicationCable::Channel
  2. def subscribed
  3. requested_account_id = params[:account_id].to_i
  4. if requested_account_id <= 0
  5. reject
  6. return
  7. end
  8. stream_from "llm_comment_generation_#{requested_account_id}"
  9. end
  10. def unsubscribed
  11. # Any cleanup needed when channel is unsubscribed
  12. end
  13. end

app/channels/operations_channel.rb

0.0% lines covered

100.0% branches covered

15 relevant lines. 0 lines covered and 15 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class OperationsChannel < ApplicationCable::Channel
  2. def subscribed
  3. requested_account_id = params[:account_id].to_i
  4. connection_account_id = current_account_id.to_i
  5. account_id = requested_account_id.positive? ? requested_account_id : connection_account_id
  6. stream_from Ops::LiveUpdateBroadcaster.account_stream(account_id) if account_id.positive?
  7. include_global = truthy?(params[:include_global]) || account_id <= 0
  8. stream_from Ops::LiveUpdateBroadcaster.global_stream if include_global
  9. end
  10. private
  11. def truthy?(raw)
  12. value = raw.to_s.strip.downcase
  13. %w[1 true yes on].include?(value)
  14. end
  15. end

app/controllers/admin/background_jobs_controller.rb

0.0% lines covered

100.0% branches covered

772 relevant lines. 0 lines covered and 772 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class Admin::BackgroundJobsController < Admin::BaseController
  2. def dashboard
  3. @backend = queue_backend
  4. if @backend == "sidekiq"
  5. load_sidekiq_dashboard!
  6. else
  7. load_solid_queue_dashboard!
  8. end
  9. attach_recent_job_details!
  10. @failure_logs = BackgroundJobFailure.recent_first.limit(100)
  11. @recent_issues = AppIssue.recent_first.limit(15)
  12. @recent_storage_ingestions = ActiveStorageIngestion.recent_first.limit(15)
  13. end
  14. def failures
  15. scope = BackgroundJobFailure.order(occurred_at: :desc, id: :desc)
  16. scope = apply_tabulator_filters(scope)
  17. @q = params[:q].to_s.strip
  18. if @q.present?
  19. term = "%#{@q.downcase}%"
  20. scope = scope.where(
  21. "LOWER(job_class) LIKE ? OR LOWER(COALESCE(queue_name, '')) LIKE ? OR LOWER(error_class) LIKE ? OR LOWER(error_message) LIKE ?",
  22. term, term, term, term
  23. )
  24. end
  25. scope = apply_remote_sort(scope) || scope
  26. page = params.fetch(:page, 1).to_i
  27. page = 1 if page < 1
  28. per_page_param = params[:per_page].presence || params[:size].presence
  29. per_page = per_page_param.to_i
  30. per_page = 50 if per_page <= 0
  31. per_page = per_page.clamp(10, 200)
  32. total = scope.count
  33. pages = (total / per_page.to_f).ceil
  34. @failures = scope.offset((page - 1) * per_page).limit(per_page)
  35. respond_to do |format|
  36. format.html
  37. format.json do
  38. render json: tabulator_payload(failures: @failures, total: total, pages: pages)
  39. end
  40. end
  41. end
  42. def failure
  43. @failure = BackgroundJobFailure.find(params[:id])
  44. end
  45. def retry_failure
  46. failure = BackgroundJobFailure.find(params[:id])
  47. Jobs::FailureRetry.enqueue!(failure)
  48. Ops::LiveUpdateBroadcaster.broadcast!(
  49. topic: "job_failures_changed",
  50. account_id: failure.instagram_account_id,
  51. payload: { action: "retry", failure_id: failure.id },
  52. throttle_key: "job_failures_changed",
  53. throttle_seconds: 0
  54. )
  55. respond_to do |format|
  56. format.html { redirect_to admin_background_job_failure_path(failure), notice: "Retry queued for #{failure.job_class}." }
  57. format.json { render json: { ok: true } }
  58. end
  59. rescue Jobs::FailureRetry::RetryError => e
  60. respond_to do |format|
  61. format.html { redirect_to admin_background_job_failure_path(params[:id]), alert: e.message }
  62. format.json { render json: { ok: false, error: e.message }, status: :unprocessable_entity }
  63. end
  64. end
  65. def clear_all_jobs
  66. backend = queue_backend
  67. if backend == "sidekiq"
  68. clear_sidekiq_jobs!
  69. else
  70. clear_solid_queue_jobs!
  71. end
  72. Ops::LiveUpdateBroadcaster.broadcast!(
  73. topic: "jobs_changed",
  74. payload: { action: "clear_all" },
  75. throttle_key: "jobs_changed",
  76. throttle_seconds: 0
  77. )
  78. redirect_to admin_background_jobs_path, notice: "All jobs have been stopped and queue cleared successfully."
  79. rescue StandardError => e
  80. redirect_to admin_background_jobs_path, alert: "Failed to clear jobs: #{e.message}"
  81. end
  82. private
  83. def queue_backend
  84. Rails.application.config.active_job.queue_adapter.to_s
  85. rescue StandardError
  86. "unknown"
  87. end
  88. def load_solid_queue_dashboard!
  89. @counts = {
  90. ready: safe_count { SolidQueue::ReadyExecution.count },
  91. scheduled: safe_count { SolidQueue::ScheduledExecution.count },
  92. claimed: safe_count { SolidQueue::ClaimedExecution.count },
  93. blocked: safe_count { SolidQueue::BlockedExecution.count },
  94. failed: safe_count { SolidQueue::FailedExecution.count },
  95. pauses: safe_count { SolidQueue::Pause.count },
  96. jobs_total: safe_count { SolidQueue::Job.count }
  97. }
  98. @processes = safe_query { SolidQueue::Process.order(last_heartbeat_at: :desc).limit(50).to_a } || []
  99. solid_jobs = safe_query { SolidQueue::Job.order(created_at: :desc).limit(100).to_a } || []
  100. @recent_jobs = solid_jobs.map { |job| serialize_solid_queue_job(job) }
  101. @recent_failed = safe_query do
  102. SolidQueue::FailedExecution
  103. .includes(:job)
  104. .order(created_at: :desc)
  105. .limit(50)
  106. .to_a
  107. end || []
  108. end
  109. def load_sidekiq_dashboard!
  110. require "sidekiq/api"
  111. queues = safe_query { Sidekiq::Queue.all } || []
  112. scheduled = Sidekiq::ScheduledSet.new
  113. retries = Sidekiq::RetrySet.new
  114. dead = Sidekiq::DeadSet.new
  115. processes = Sidekiq::ProcessSet.new
  116. queue_rows = queues.map { |queue| { name: queue.name, size: queue.size } }
  117. @counts = {
  118. enqueued: queue_rows.sum { |row| row[:size].to_i },
  119. scheduled: safe_count { scheduled.size },
  120. retries: safe_count { retries.size },
  121. dead: safe_count { dead.size },
  122. processes: safe_count { processes.size },
  123. queues: queue_rows
  124. }
  125. @processes = safe_query do
  126. processes.map do |p|
  127. {
  128. identity: p["identity"],
  129. hostname: p["hostname"],
  130. pid: p["pid"],
  131. queues: Array(p["queues"]),
  132. labels: Array(p["labels"]),
  133. busy: p["busy"].to_i,
  134. beat: parse_time(p["beat"])
  135. }
  136. end.sort_by { |row| row[:beat] || Time.at(0) }.reverse.first(50)
  137. end || []
  138. enqueued_rows = queues.flat_map do |queue|
  139. queue.first(30).map { |job| serialize_sidekiq_job(job: job, status: "enqueued", queue_name: queue.name) }
  140. end
  141. scheduled_rows = scheduled.first(30).map { |job| serialize_sidekiq_job(job: job, status: "scheduled", queue_name: job.queue) }
  142. retry_rows = retries.first(20).map { |job| serialize_sidekiq_job(job: job, status: "retry", queue_name: job.queue) }
  143. dead_rows = dead.first(20).map { |job| serialize_sidekiq_job(job: job, status: "dead", queue_name: job.queue) }
  144. @recent_jobs = (enqueued_rows + scheduled_rows + retry_rows + dead_rows)
  145. .sort_by { |row| row[:created_at] || Time.at(0) }
  146. .reverse
  147. .first(100)
  148. @recent_failed = (retry_rows + dead_rows).first(50)
  149. rescue StandardError
  150. @counts = { enqueued: 0, scheduled: 0, retries: 0, dead: 0, processes: 0, queues: [] }
  151. @processes = []
  152. @recent_jobs = []
  153. @recent_failed = []
  154. end
  155. def serialize_sidekiq_job(job:, status:, queue_name:)
  156. item = job.item.to_h
  157. wrapper = active_job_wrapper_from_sidekiq(item)
  158. context = Jobs::ContextExtractor.from_active_job_arguments(wrapper["arguments"] || item["args"])
  159. {
  160. created_at: parse_time(item["created_at"] || item["enqueued_at"] || item["at"]),
  161. class_name: wrapper["job_class"].presence || item["wrapped"].presence || item["class"].to_s,
  162. queue_name: queue_name.to_s,
  163. status: status,
  164. jid: item["jid"].to_s,
  165. active_job_id: wrapper["job_id"].to_s.presence,
  166. provider_job_id: wrapper["provider_job_id"].to_s.presence || item["jid"].to_s.presence,
  167. error_message: item["error_message"].to_s.presence,
  168. job_scope: context[:job_scope],
  169. context_label: context[:context_label],
  170. instagram_account_id: context[:instagram_account_id],
  171. instagram_profile_id: context[:instagram_profile_id],
  172. arguments: wrapper["arguments"] || item["args"] || []
  173. }
  174. rescue StandardError
  175. {
  176. created_at: nil,
  177. class_name: "unknown",
  178. queue_name: queue_name.to_s,
  179. status: status,
  180. jid: nil,
  181. active_job_id: nil,
  182. provider_job_id: nil,
  183. error_message: nil,
  184. job_scope: "system",
  185. context_label: "System",
  186. instagram_account_id: nil,
  187. instagram_profile_id: nil,
  188. arguments: []
  189. }
  190. end
  191. def serialize_solid_queue_job(job)
  192. args = job.respond_to?(:arguments) ? job.arguments : {}
  193. context = Jobs::ContextExtractor.from_solid_queue_job_arguments(args)
  194. status =
  195. if job.respond_to?(:finished_at) && job.finished_at.present?
  196. "finished"
  197. elsif job.respond_to?(:scheduled_at) && job.scheduled_at.present?
  198. "scheduled"
  199. else
  200. "running/queued"
  201. end
  202. {
  203. created_at: (job.created_at if job.respond_to?(:created_at)),
  204. class_name: (job.class_name if job.respond_to?(:class_name)) || "unknown",
  205. queue_name: (job.queue_name if job.respond_to?(:queue_name)).to_s,
  206. status: status,
  207. jid: (job.id.to_s if job.respond_to?(:id)),
  208. active_job_id: (job.active_job_id.to_s if job.respond_to?(:active_job_id)).presence,
  209. provider_job_id: nil,
  210. error_message: nil,
  211. job_scope: context[:job_scope],
  212. context_label: context[:context_label],
  213. instagram_account_id: context[:instagram_account_id],
  214. instagram_profile_id: context[:instagram_profile_id],
  215. arguments: args || []
  216. }
  217. rescue StandardError
  218. {
  219. created_at: nil,
  220. class_name: "unknown",
  221. queue_name: "",
  222. status: "unknown",
  223. jid: nil,
  224. active_job_id: nil,
  225. provider_job_id: nil,
  226. error_message: nil,
  227. job_scope: "system",
  228. context_label: "System",
  229. instagram_account_id: nil,
  230. instagram_profile_id: nil,
  231. arguments: []
  232. }
  233. end
  234. def attach_recent_job_details!
  235. rows = Array(@recent_jobs)
  236. return if rows.empty?
  237. active_job_ids = rows.map { |row| row[:active_job_id].to_s.presence }.compact.uniq
  238. action_logs_by_job_id = load_action_logs_by_job_id(active_job_ids: active_job_ids)
  239. failures_by_job_id = load_failures_by_job_id(active_job_ids: active_job_ids)
  240. ingestions_by_job_id = load_ingestions_by_job_id(active_job_ids: active_job_ids)
  241. llm_events_by_job_id = load_llm_events_by_job_id(active_job_ids: active_job_ids)
  242. api_calls_by_job_id = load_api_calls_by_job_id(active_job_ids: active_job_ids)
  243. rows.each do |row|
  244. active_job_id = row[:active_job_id].to_s
  245. action_log = action_logs_by_job_id[active_job_id]&.first
  246. failure = failures_by_job_id[active_job_id]&.first
  247. direct_ingestions = ingestions_by_job_id[active_job_id] || []
  248. direct_llm_events = llm_events_by_job_id[active_job_id] || []
  249. direct_api_calls = api_calls_by_job_id[active_job_id] || []
  250. row[:details] = build_job_details(
  251. row: row,
  252. action_log: action_log,
  253. failure: failure,
  254. direct_ingestions: direct_ingestions,
  255. direct_llm_events: direct_llm_events,
  256. direct_api_calls: direct_api_calls
  257. )
  258. end
  259. rescue StandardError
  260. rows.each { |row| row[:details] = fallback_job_details(row: row) }
  261. end
  262. def load_action_logs_by_job_id(active_job_ids:)
  263. return {} if active_job_ids.empty?
  264. InstagramProfileActionLog
  265. .includes(:instagram_account, :instagram_profile)
  266. .where(active_job_id: active_job_ids)
  267. .order(created_at: :desc)
  268. .to_a
  269. .group_by { |row| row.active_job_id.to_s }
  270. rescue StandardError
  271. {}
  272. end
  273. def load_failures_by_job_id(active_job_ids:)
  274. return {} if active_job_ids.empty?
  275. BackgroundJobFailure
  276. .where(active_job_id: active_job_ids)
  277. .order(occurred_at: :desc, id: :desc)
  278. .to_a
  279. .group_by { |row| row.active_job_id.to_s }
  280. rescue StandardError
  281. {}
  282. end
  283. def load_ingestions_by_job_id(active_job_ids:)
  284. return {} if active_job_ids.empty?
  285. ActiveStorageIngestion
  286. .where(created_by_active_job_id: active_job_ids)
  287. .order(created_at: :desc, id: :desc)
  288. .limit(400)
  289. .to_a
  290. .group_by { |row| row.created_by_active_job_id.to_s }
  291. rescue StandardError
  292. {}
  293. end
  294. def load_llm_events_by_job_id(active_job_ids:)
  295. return {} if active_job_ids.empty?
  296. InstagramProfileEvent
  297. .where(llm_comment_job_id: active_job_ids)
  298. .order(updated_at: :desc, id: :desc)
  299. .limit(300)
  300. .to_a
  301. .group_by { |row| row.llm_comment_job_id.to_s }
  302. rescue StandardError
  303. {}
  304. end
  305. def load_api_calls_by_job_id(active_job_ids:)
  306. return {} if active_job_ids.empty?
  307. index = Hash.new { |h, k| h[k] = [] }
  308. AiApiCall.recent_first.limit(600).to_a.each do |call|
  309. metadata = call.metadata.is_a?(Hash) ? call.metadata : {}
  310. active_job_id = metadata["active_job_id"].to_s
  311. next if active_job_id.blank? || !active_job_ids.include?(active_job_id)
  312. index[active_job_id] << call
  313. end
  314. index
  315. rescue StandardError
  316. {}
  317. end
  318. def build_job_details(row:, action_log:, failure:, direct_ingestions:, direct_llm_events:, direct_api_calls:)
  319. window = inferred_time_window(row: row, action_log: action_log, failure: failure)
  320. api_calls = direct_api_calls.presence || fallback_api_calls(row: row, window: window)
  321. ingestions = direct_ingestions.presence || fallback_ingestions(row: row, window: window)
  322. llm_events = direct_llm_events.presence || fallback_llm_events(row: row, window: window)
  323. ai_analyses = related_ai_analyses(row: row, action_log: action_log, window: window)
  324. story_rows = related_story_rows(row: row, window: window)
  325. processing_steps = build_processing_steps(
  326. row: row,
  327. action_log: action_log,
  328. failure: failure,
  329. api_calls: api_calls,
  330. ingestions: ingestions,
  331. llm_events: llm_events,
  332. ai_analyses: ai_analyses,
  333. story_rows: story_rows
  334. )
  335. final_output = build_final_output(row: row, action_log: action_log, failure: failure)
  336. technical_data = build_technical_data(action_log: action_log, llm_events: llm_events, ai_analyses: ai_analyses, story_rows: story_rows)
  337. {
  338. processing_steps: processing_steps,
  339. final_output: final_output,
  340. api_responses: api_calls.first(8).map { |call| serialize_api_call(call) },
  341. technical_data: technical_data,
  342. blobs: ingestions.first(10).map { |row_item| serialize_ingestion(row_item) }
  343. }
  344. rescue StandardError
  345. fallback_job_details(row: row)
  346. end
  347. def fallback_job_details(row:)
  348. {
  349. processing_steps: [ "No detailed processing records were linked to this job yet." ],
  350. final_output: {
  351. status: row[:status].to_s,
  352. summary: row[:error_message].to_s.presence || "No final output captured yet."
  353. }.compact,
  354. api_responses: [],
  355. technical_data: [],
  356. blobs: []
  357. }
  358. end
  359. def inferred_time_window(row:, action_log:, failure:)
  360. started_candidates = [
  361. action_log&.started_at,
  362. action_log&.occurred_at,
  363. row[:created_at],
  364. failure&.occurred_at
  365. ].compact
  366. ended_candidates = [
  367. action_log&.finished_at,
  368. failure&.occurred_at,
  369. row[:created_at]
  370. ].compact
  371. return nil if started_candidates.empty? && ended_candidates.empty?
  372. started_at = (started_candidates.min || ended_candidates.min) - 20.minutes
  373. ended_at = (ended_candidates.max || started_at + 2.hours) + 20.minutes
  374. started_at..ended_at
  375. rescue StandardError
  376. nil
  377. end
  378. def fallback_api_calls(row:, window:)
  379. account_id = row[:instagram_account_id].to_i
  380. return [] unless account_id.positive?
  381. scope = AiApiCall.where(instagram_account_id: account_id).order(occurred_at: :desc, id: :desc)
  382. scope = scope.where(occurred_at: window) if window
  383. scope.limit(8).to_a
  384. rescue StandardError
  385. []
  386. end
  387. def fallback_ingestions(row:, window:)
  388. scope = ActiveStorageIngestion.order(created_at: :desc, id: :desc)
  389. profile_id = row[:instagram_profile_id].to_i
  390. account_id = row[:instagram_account_id].to_i
  391. return [] unless profile_id.positive? || account_id.positive?
  392. scope = scope.where(instagram_profile_id: profile_id) if profile_id.positive?
  393. scope = scope.where(instagram_account_id: account_id) if !profile_id.positive? && account_id.positive?
  394. scope = scope.where(created_at: window) if window
  395. scope.limit(10).to_a
  396. rescue StandardError
  397. []
  398. end
  399. def fallback_llm_events(row:, window:)
  400. profile_id = row[:instagram_profile_id].to_i
  401. return [] unless profile_id.positive?
  402. scope = InstagramProfileEvent.where(instagram_profile_id: profile_id).order(updated_at: :desc, id: :desc)
  403. scope = scope.where(updated_at: window) if window
  404. scope.limit(6).to_a.select do |event|
  405. llm_meta = event.llm_comment_metadata.is_a?(Hash) ? event.llm_comment_metadata : {}
  406. raw_meta = event.metadata.is_a?(Hash) ? event.metadata : {}
  407. llm_meta.present? || raw_meta["processing_metadata"].is_a?(Hash) || raw_meta["local_story_intelligence"].is_a?(Hash)
  408. end
  409. rescue StandardError
  410. []
  411. end
  412. def related_ai_analyses(row:, action_log:, window:)
  413. account_id = row[:instagram_account_id].to_i
  414. return [] unless account_id.positive?
  415. scope = AiAnalysis.where(instagram_account_id: account_id).order(created_at: :desc, id: :desc)
  416. scope = scope.where(created_at: window) if window
  417. profile_id = row[:instagram_profile_id].to_i
  418. if profile_id.positive?
  419. scope = scope.where(analyzable_type: "InstagramProfile", analyzable_id: profile_id)
  420. end
  421. purpose_hint = purpose_hint_for(row: row, action_log: action_log)
  422. scope = scope.where(purpose: purpose_hint) if purpose_hint.present?
  423. scope.limit(6).to_a
  424. rescue StandardError
  425. []
  426. end
  427. def purpose_hint_for(row:, action_log:)
  428. klass = row[:class_name].to_s
  429. action = action_log&.action.to_s
  430. return "post" if klass.include?("AnalyzeInstagramPostJob") || action == "capture_profile_posts" || action == "analyze_profile_posts"
  431. return "profile" if klass.include?("AnalyzeInstagramProfileJob") || action == "analyze_profile"
  432. nil
  433. end
  434. def related_story_rows(row:, window:)
  435. profile_id = row[:instagram_profile_id].to_i
  436. return [] unless profile_id.positive?
  437. scope = InstagramStory.where(instagram_profile_id: profile_id).order(updated_at: :desc, id: :desc)
  438. scope = scope.where(updated_at: window) if window
  439. scope.limit(6).to_a.select do |story|
  440. metadata = story.metadata.is_a?(Hash) ? story.metadata : {}
  441. metadata["processing_metadata"].is_a?(Hash) ||
  442. metadata["generated_response_suggestions"].present? ||
  443. metadata["content_understanding"].is_a?(Hash)
  444. end
  445. rescue StandardError
  446. []
  447. end
  448. def build_processing_steps(row:, action_log:, failure:, api_calls:, ingestions:, llm_events:, ai_analyses:, story_rows:)
  449. steps = []
  450. if row[:created_at].present?
  451. steps << "Queued in #{row[:queue_name].to_s.presence || '-'} at #{row[:created_at].iso8601}."
  452. else
  453. steps << "Queued in #{row[:queue_name].to_s.presence || '-'}."
  454. end
  455. if action_log
  456. steps << "Action log '#{action_log.action}' recorded with status '#{action_log.status}'."
  457. steps << "Execution started at #{action_log.started_at.iso8601}." if action_log.started_at.present?
  458. steps << "Execution finished at #{action_log.finished_at.iso8601}." if action_log.finished_at.present?
  459. end
  460. steps << "Captured #{api_calls.length} related API call(s)." if api_calls.any?
  461. steps << "Generated #{ai_analyses.length} AI analysis record(s)." if ai_analyses.any?
  462. steps << "Updated #{llm_events.length} LLM/story event record(s)." if llm_events.any?
  463. steps << "Persisted #{story_rows.length} story processing artifact(s)." if story_rows.any?
  464. steps << "Stored #{ingestions.length} blob/file ingestion record(s)." if ingestions.any?
  465. if failure
  466. steps << "Failed at #{failure.occurred_at&.iso8601 || 'unknown time'} with #{failure.error_class}: #{failure.error_message.to_s.byteslice(0, 240)}"
  467. end
  468. steps.uniq.first(12)
  469. end
  470. def build_final_output(row:, action_log:, failure:)
  471. {
  472. status: action_log&.status.to_s.presence || (failure.present? ? "failed" : row[:status].to_s),
  473. summary: action_log&.log_text.to_s.presence || failure&.error_message.to_s.presence || row[:error_message].to_s.presence || "No final output captured yet.",
  474. error_class: failure&.error_class.to_s.presence,
  475. error_message: action_log&.error_message.to_s.presence || failure&.error_message.to_s.presence || row[:error_message].to_s.presence,
  476. metadata: compact_data(action_log&.metadata)
  477. }.compact
  478. end
  479. def build_technical_data(action_log:, llm_events:, ai_analyses:, story_rows:)
  480. rows = []
  481. rows << {
  482. source: "profile_action_log",
  483. payload: compact_data(action_log.metadata)
  484. } if action_log&.metadata.is_a?(Hash)
  485. llm_events.first(4).each do |event|
  486. rows << {
  487. source: "instagram_profile_event",
  488. payload: {
  489. event_id: event.id,
  490. event_kind: event.kind,
  491. llm_comment_status: event.llm_comment_status,
  492. llm_comment_model: event.llm_comment_model,
  493. llm_comment_provider: event.llm_comment_provider,
  494. generated_comment: event.llm_generated_comment.to_s.presence&.byteslice(0, 280),
  495. relevance_score: event.llm_comment_relevance_score,
  496. llm_comment_metadata: compact_data(event.llm_comment_metadata),
  497. metadata: compact_data(event.metadata)
  498. }.compact
  499. }
  500. end
  501. ai_analyses.first(4).each do |analysis|
  502. rows << {
  503. source: "ai_analysis",
  504. payload: {
  505. analysis_id: analysis.id,
  506. purpose: analysis.purpose,
  507. provider: analysis.provider,
  508. model: analysis.model,
  509. status: analysis.status,
  510. started_at: analysis.started_at&.iso8601,
  511. finished_at: analysis.finished_at&.iso8601,
  512. response_excerpt: analysis.response_text.to_s.presence&.byteslice(0, 320),
  513. analysis: compact_data(analysis.analysis),
  514. metadata: compact_data(analysis.metadata)
  515. }.compact
  516. }
  517. end
  518. story_rows.first(4).each do |story|
  519. metadata = story.metadata.is_a?(Hash) ? story.metadata : {}
  520. rows << {
  521. source: "instagram_story",
  522. payload: {
  523. story_id: story.story_id,
  524. media_type: story.media_type,
  525. processing_status: story.processing_status,
  526. processed: story.processed,
  527. processed_at: story.processed_at&.iso8601,
  528. metadata: compact_data(
  529. metadata.slice(
  530. "processing_metadata",
  531. "generated_response_suggestions",
  532. "content_understanding",
  533. "face_count",
  534. "content_signals",
  535. "ocr_text",
  536. "transcript",
  537. "object_detections",
  538. "scenes"
  539. )
  540. )
  541. }.compact
  542. }
  543. end
  544. rows.first(12)
  545. end
  546. def serialize_api_call(call)
  547. metadata = call.metadata.is_a?(Hash) ? call.metadata : {}
  548. {
  549. occurred_at: call.occurred_at&.iso8601,
  550. provider: call.provider,
  551. operation: call.operation,
  552. category: call.category,
  553. status: call.status,
  554. http_status: call.http_status,
  555. latency_ms: call.latency_ms,
  556. input_tokens: call.input_tokens,
  557. output_tokens: call.output_tokens,
  558. total_tokens: call.total_tokens,
  559. error_message: call.error_message.to_s.presence,
  560. metadata: compact_data(metadata)
  561. }.compact
  562. end
  563. def serialize_ingestion(row)
  564. {
  565. created_at: row.created_at&.iso8601,
  566. attachment_name: row.attachment_name,
  567. record_type: row.record_type,
  568. record_id: row.record_id,
  569. blob_filename: row.blob_filename,
  570. blob_content_type: row.blob_content_type,
  571. blob_byte_size: row.blob_byte_size,
  572. metadata: compact_data(row.metadata)
  573. }.compact
  574. end
  575. def compact_data(value, depth: 0, max_depth: 3)
  576. return nil if value.nil?
  577. return "[depth_limit]" if depth >= max_depth
  578. case value
  579. when Hash
  580. compacted = {}
  581. value.to_h.each do |key, item|
  582. normalized = compact_data(item, depth: depth + 1, max_depth: max_depth)
  583. next if normalized.blank? && normalized != false && normalized != 0
  584. compacted[key.to_s] = normalized
  585. break if compacted.length >= 20
  586. end
  587. compacted
  588. when Array
  589. value.first(10).map { |item| compact_data(item, depth: depth + 1, max_depth: max_depth) }.compact
  590. when String
  591. text = value.to_s.strip
  592. return nil if text.blank?
  593. text.byteslice(0, 320)
  594. when Time, Date, DateTime
  595. value.iso8601
  596. else
  597. value
  598. end
  599. rescue StandardError
  600. value.to_s.byteslice(0, 320)
  601. end
  602. def active_job_wrapper_from_sidekiq(item)
  603. args = Array(item["args"])
  604. first = args.first
  605. return first.to_h if first.respond_to?(:to_h) && first.to_h["job_class"].present?
  606. {}
  607. rescue StandardError
  608. {}
  609. end
  610. def parse_time(value)
  611. return nil if value.blank?
  612. Time.at(value.to_f)
  613. rescue StandardError
  614. nil
  615. end
  616. def safe_count
  617. yield
  618. rescue StandardError
  619. 0
  620. end
  621. def safe_query
  622. yield
  623. rescue StandardError
  624. nil
  625. end
  626. def apply_tabulator_filters(scope)
  627. extract_tabulator_filters.each do |f|
  628. field = f[:field]
  629. value = f[:value]
  630. next if value.blank?
  631. case field
  632. when "job_class"
  633. term = "%#{value.downcase}%"
  634. scope = scope.where("LOWER(job_class) LIKE ?", term)
  635. when "queue_name"
  636. term = "%#{value.downcase}%"
  637. scope = scope.where("LOWER(COALESCE(queue_name,'')) LIKE ?", term)
  638. when "error_message"
  639. term = "%#{value.downcase}%"
  640. scope = scope.where("LOWER(COALESCE(error_message,'')) LIKE ?", term)
  641. when "failure_kind"
  642. scope = scope.where(failure_kind: value.to_s)
  643. when "retryable"
  644. parsed = ActiveModel::Type::Boolean.new.cast(value)
  645. scope = scope.where(retryable: parsed)
  646. end
  647. end
  648. scope
  649. end
  650. def extract_tabulator_filters
  651. raw = params[:filters].presence || params[:filter]
  652. return [] unless raw.present?
  653. entries =
  654. case raw
  655. when String
  656. JSON.parse(raw)
  657. when Array
  658. raw
  659. when ActionController::Parameters
  660. raw.to_unsafe_h.values
  661. else
  662. []
  663. end
  664. Array(entries).filter_map do |item|
  665. h = item.respond_to?(:to_h) ? item.to_h : {}
  666. field = h["field"].to_s
  667. next if field.blank?
  668. { field: field, value: h["value"] }
  669. end
  670. rescue StandardError
  671. []
  672. end
  673. def apply_remote_sort(scope)
  674. sorters = extract_tabulator_sorters
  675. return nil unless sorters.is_a?(Array)
  676. first = sorters.first
  677. return nil unless first.respond_to?(:[])
  678. field = first["field"].to_s
  679. dir = first["dir"].to_s.downcase == "desc" ? "DESC" : "ASC"
  680. case field
  681. when "occurred_at"
  682. scope.order(Arel.sql("occurred_at #{dir}, id #{dir}"))
  683. when "job_class"
  684. scope.order(Arel.sql("job_class #{dir}, occurred_at DESC, id DESC"))
  685. when "queue_name"
  686. scope.order(Arel.sql("queue_name #{dir} NULLS LAST, occurred_at DESC, id DESC"))
  687. when "error_class"
  688. scope.order(Arel.sql("error_class #{dir}, occurred_at DESC, id DESC"))
  689. when "failure_kind"
  690. scope.order(Arel.sql("failure_kind #{dir}, occurred_at DESC, id DESC"))
  691. else
  692. nil
  693. end
  694. end
  695. def tabulator_payload(failures:, total:, pages:)
  696. data = failures.map do |f|
  697. scope = failure_scope(f)
  698. {
  699. id: f.id,
  700. occurred_at: f.occurred_at&.iso8601,
  701. job_scope: scope,
  702. context_label: failure_context_label(f, scope: scope),
  703. instagram_account_id: f.instagram_account_id,
  704. instagram_profile_id: f.instagram_profile_id,
  705. job_class: f.job_class,
  706. queue_name: f.queue_name,
  707. failure_kind: f.failure_kind,
  708. retryable: f.retryable_now?,
  709. error_class: f.error_class,
  710. error_message: f.error_message,
  711. open_url: Rails.application.routes.url_helpers.admin_background_job_failure_path(f),
  712. retry_url: Rails.application.routes.url_helpers.admin_retry_background_job_failure_path(f)
  713. }
  714. end
  715. {
  716. data: data,
  717. last_page: pages,
  718. last_row: total
  719. }
  720. end
  721. def failure_scope(failure)
  722. return "profile" if failure.instagram_profile_id.present?
  723. return "account" if failure.instagram_account_id.present?
  724. "system"
  725. end
  726. def failure_context_label(failure, scope:)
  727. case scope
  728. when "profile"
  729. "Profile ##{failure.instagram_profile_id} (Account ##{failure.instagram_account_id || '?'})"
  730. when "account"
  731. "Account ##{failure.instagram_account_id}"
  732. else
  733. "System"
  734. end
  735. end
  736. def extract_tabulator_sorters
  737. raw = params[:sorters].presence || params[:sort]
  738. return [] unless raw.present?
  739. case raw
  740. when String
  741. parsed = JSON.parse(raw)
  742. parsed.is_a?(Array) ? parsed : []
  743. when Array
  744. raw
  745. when ActionController::Parameters
  746. raw.to_unsafe_h.values
  747. else
  748. []
  749. end
  750. rescue StandardError
  751. []
  752. end
  753. def clear_sidekiq_jobs!
  754. require "sidekiq/api"
  755. # Clear all queues
  756. Sidekiq::Queue.all.each(&:clear)
  757. # Clear scheduled jobs
  758. Sidekiq::ScheduledSet.new.clear
  759. # Clear retry jobs
  760. Sidekiq::RetrySet.new.clear
  761. # Clear dead jobs
  762. Sidekiq::DeadSet.new.clear
  763. # Stop all processes by sending quiet signal
  764. Sidekiq::ProcessSet.new.each do |process|
  765. process.quiet! if process.alive?
  766. end
  767. end
  768. def clear_solid_queue_jobs!
  769. # Clear all job executions
  770. SolidQueue::ReadyExecution.delete_all
  771. SolidQueue::ScheduledExecution.delete_all
  772. SolidQueue::ClaimedExecution.delete_all
  773. SolidQueue::BlockedExecution.delete_all
  774. SolidQueue::FailedExecution.delete_all
  775. SolidQueue::Job.delete_all
  776. # Stop all processes
  777. SolidQueue::Process.delete_all
  778. end
  779. end

app/controllers/admin/base_controller.rb

0.0% lines covered

100.0% branches covered

17 relevant lines. 0 lines covered and 17 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class Admin::BaseController < ApplicationController
  2. before_action :require_admin!
  3. private
  4. def require_admin!
  5. user = Rails.application.credentials.dig(:admin, :user).presence || ENV["ADMIN_USER"].to_s
  6. pass = Rails.application.credentials.dig(:admin, :password).presence || ENV["ADMIN_PASSWORD"].to_s
  7. # If no creds are configured, leave admin pages open for easier setup.
  8. # You can enable auth later by setting both credentials/admin env vars.
  9. return if user.blank? && pass.blank?
  10. if user.blank? || pass.blank?
  11. render plain: "Admin credentials are partially configured. Set both user and password, or clear both to disable auth.", status: :service_unavailable
  12. return
  13. end
  14. authenticate_or_request_with_http_basic("Admin") do |u, p|
  15. ActiveSupport::SecurityUtils.secure_compare(u.to_s, user.to_s) &
  16. ActiveSupport::SecurityUtils.secure_compare(p.to_s, pass.to_s)
  17. end
  18. end
  19. end

app/controllers/admin/issues_controller.rb

0.0% lines covered

100.0% branches covered

169 relevant lines. 0 lines covered and 169 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class Admin::IssuesController < Admin::BaseController
  2. def index
  3. scope = AppIssue.includes(:background_job_failure).recent_first
  4. scope = apply_tabulator_filters(scope)
  5. q = params[:q].to_s.strip
  6. if q.present?
  7. term = "%#{q.downcase}%"
  8. scope = scope.where(
  9. "LOWER(title) LIKE ? OR LOWER(COALESCE(details, '')) LIKE ? OR LOWER(issue_type) LIKE ? OR LOWER(source) LIKE ?",
  10. term, term, term, term
  11. )
  12. end
  13. scope = apply_remote_sort(scope) || scope
  14. page = params.fetch(:page, 1).to_i
  15. page = 1 if page < 1
  16. per_page = (params[:per_page].presence || params[:size].presence || 50).to_i.clamp(10, 200)
  17. total = scope.count
  18. pages = (total / per_page.to_f).ceil
  19. @issues = scope.offset((page - 1) * per_page).limit(per_page)
  20. respond_to do |format|
  21. format.html
  22. format.json { render json: tabulator_payload(issues: @issues, total: total, pages: pages) }
  23. end
  24. end
  25. def update
  26. issue = AppIssue.find(params[:id])
  27. status = params[:status].to_s
  28. notes = params[:resolution_notes].to_s
  29. case status
  30. when "open"
  31. issue.mark_open!(notes: notes)
  32. when "pending"
  33. issue.mark_pending!(notes: notes)
  34. when "resolved"
  35. issue.mark_resolved!(notes: notes)
  36. else
  37. raise ArgumentError, "Unsupported status: #{status}"
  38. end
  39. respond_to do |format|
  40. format.html { redirect_to admin_issues_path, notice: "Issue ##{issue.id} updated." }
  41. format.json { render json: { ok: true, id: issue.id, status: issue.status } }
  42. end
  43. rescue StandardError => e
  44. respond_to do |format|
  45. format.html { redirect_to admin_issues_path, alert: "Unable to update issue: #{e.message}" }
  46. format.json { render json: { ok: false, error: e.message }, status: :unprocessable_entity }
  47. end
  48. end
  49. def retry_job
  50. issue = AppIssue.find(params[:id])
  51. failure = issue.background_job_failure
  52. raise Jobs::FailureRetry::RetryError, "Issue is not linked to a failed background job" unless failure
  53. Jobs::FailureRetry.enqueue!(failure)
  54. issue.mark_pending!(notes: "Retry queued at #{Time.current.iso8601}.")
  55. respond_to do |format|
  56. format.html { redirect_to admin_issues_path, notice: "Retry queued for issue ##{issue.id}." }
  57. format.json { render json: { ok: true } }
  58. end
  59. rescue Jobs::FailureRetry::RetryError => e
  60. respond_to do |format|
  61. format.html { redirect_to admin_issues_path, alert: e.message }
  62. format.json { render json: { ok: false, error: e.message }, status: :unprocessable_entity }
  63. end
  64. end
  65. private
  66. def apply_tabulator_filters(scope)
  67. extract_tabulator_filters.each do |f|
  68. field = f[:field]
  69. value = f[:value]
  70. next if value.blank?
  71. case field
  72. when "status"
  73. scope = scope.where(status: value.to_s)
  74. when "severity"
  75. scope = scope.where(severity: value.to_s)
  76. when "issue_type"
  77. term = "%#{value.downcase}%"
  78. scope = scope.where("LOWER(issue_type) LIKE ?", term)
  79. when "source"
  80. term = "%#{value.downcase}%"
  81. scope = scope.where("LOWER(source) LIKE ?", term)
  82. end
  83. end
  84. scope
  85. end
  86. def extract_tabulator_filters
  87. raw = params[:filters].presence || params[:filter]
  88. return [] unless raw.present?
  89. entries =
  90. case raw
  91. when String
  92. JSON.parse(raw)
  93. when Array
  94. raw
  95. when ActionController::Parameters
  96. raw.to_unsafe_h.values
  97. else
  98. []
  99. end
  100. Array(entries).filter_map do |item|
  101. h = item.respond_to?(:to_h) ? item.to_h : {}
  102. field = h["field"].to_s
  103. next if field.blank?
  104. { field: field, value: h["value"] }
  105. end
  106. rescue StandardError
  107. []
  108. end
  109. def apply_remote_sort(scope)
  110. sorters = extract_tabulator_sorters
  111. return nil unless sorters.is_a?(Array)
  112. first = sorters.first
  113. return nil unless first.respond_to?(:[])
  114. field = first["field"].to_s
  115. dir = first["dir"].to_s.downcase == "desc" ? "DESC" : "ASC"
  116. case field
  117. when "last_seen_at"
  118. scope.order(Arel.sql("last_seen_at #{dir}, id #{dir}"))
  119. when "severity"
  120. scope.order(Arel.sql("severity #{dir}, last_seen_at DESC, id DESC"))
  121. when "status"
  122. scope.order(Arel.sql("status #{dir}, last_seen_at DESC, id DESC"))
  123. when "occurrences"
  124. scope.order(Arel.sql("occurrences #{dir}, last_seen_at DESC, id DESC"))
  125. else
  126. nil
  127. end
  128. end
  129. def extract_tabulator_sorters
  130. raw = params[:sorters].presence || params[:sort]
  131. return [] unless raw.present?
  132. case raw
  133. when String
  134. parsed = JSON.parse(raw)
  135. parsed.is_a?(Array) ? parsed : []
  136. when Array
  137. raw
  138. when ActionController::Parameters
  139. raw.to_unsafe_h.values
  140. else
  141. []
  142. end
  143. rescue StandardError
  144. []
  145. end
  146. def tabulator_payload(issues:, total:, pages:)
  147. data = issues.map do |issue|
  148. {
  149. id: issue.id,
  150. title: issue.title,
  151. issue_type: issue.issue_type,
  152. source: issue.source,
  153. severity: issue.severity,
  154. status: issue.status,
  155. details: issue.details.to_s,
  156. occurrences: issue.occurrences.to_i,
  157. first_seen_at: issue.first_seen_at&.iso8601,
  158. last_seen_at: issue.last_seen_at&.iso8601,
  159. instagram_account_id: issue.instagram_account_id,
  160. instagram_profile_id: issue.instagram_profile_id,
  161. retryable: issue.retryable?,
  162. failure_url: issue.background_job_failure ? Rails.application.routes.url_helpers.admin_background_job_failure_path(issue.background_job_failure) : nil,
  163. update_url: Rails.application.routes.url_helpers.admin_issue_path(issue),
  164. retry_url: Rails.application.routes.url_helpers.retry_job_admin_issue_path(issue)
  165. }
  166. end
  167. { data: data, last_page: pages, last_row: total }
  168. end
  169. end

app/controllers/admin/storage_ingestions_controller.rb

0.0% lines covered

100.0% branches covered

131 relevant lines. 0 lines covered and 131 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class Admin::StorageIngestionsController < Admin::BaseController
  2. def index
  3. scope = ActiveStorageIngestion.includes(:blob).recent_first
  4. scope = apply_tabulator_filters(scope)
  5. scope = apply_remote_sort(scope) || scope
  6. page = params.fetch(:page, 1).to_i
  7. page = 1 if page < 1
  8. per_page = (params[:per_page].presence || params[:size].presence || 50).to_i.clamp(10, 200)
  9. total = scope.count
  10. pages = (total / per_page.to_f).ceil
  11. @ingestions = scope.offset((page - 1) * per_page).limit(per_page)
  12. respond_to do |format|
  13. format.html
  14. format.json { render json: tabulator_payload(ingestions: @ingestions, total: total, pages: pages) }
  15. end
  16. end
  17. private
  18. def apply_tabulator_filters(scope)
  19. extract_tabulator_filters.each do |f|
  20. field = f[:field]
  21. value = f[:value]
  22. next if value.blank?
  23. case field
  24. when "attachment_name"
  25. term = "%#{value.downcase}%"
  26. scope = scope.where("LOWER(attachment_name) LIKE ?", term)
  27. when "record_type"
  28. term = "%#{value.downcase}%"
  29. scope = scope.where("LOWER(COALESCE(record_type, '')) LIKE ?", term)
  30. when "created_by_job_class"
  31. term = "%#{value.downcase}%"
  32. scope = scope.where("LOWER(COALESCE(created_by_job_class, '')) LIKE ?", term)
  33. end
  34. end
  35. scope
  36. end
  37. def extract_tabulator_filters
  38. raw = params[:filters].presence || params[:filter]
  39. return [] unless raw.present?
  40. entries =
  41. case raw
  42. when String
  43. JSON.parse(raw)
  44. when Array
  45. raw
  46. when ActionController::Parameters
  47. raw.to_unsafe_h.values
  48. else
  49. []
  50. end
  51. Array(entries).filter_map do |item|
  52. h = item.respond_to?(:to_h) ? item.to_h : {}
  53. field = h["field"].to_s
  54. next if field.blank?
  55. { field: field, value: h["value"] }
  56. end
  57. rescue StandardError
  58. []
  59. end
  60. def apply_remote_sort(scope)
  61. sorters = extract_tabulator_sorters
  62. return nil unless sorters.is_a?(Array)
  63. first = sorters.first
  64. return nil unless first.respond_to?(:[])
  65. field = first["field"].to_s
  66. dir = first["dir"].to_s.downcase == "desc" ? "DESC" : "ASC"
  67. case field
  68. when "created_at"
  69. scope.order(Arel.sql("created_at #{dir}, id #{dir}"))
  70. when "blob_byte_size"
  71. scope.order(Arel.sql("blob_byte_size #{dir}, created_at DESC, id DESC"))
  72. when "record_type"
  73. scope.order(Arel.sql("record_type #{dir} NULLS LAST, created_at DESC, id DESC"))
  74. else
  75. nil
  76. end
  77. end
  78. def extract_tabulator_sorters
  79. raw = params[:sorters].presence || params[:sort]
  80. return [] unless raw.present?
  81. case raw
  82. when String
  83. parsed = JSON.parse(raw)
  84. parsed.is_a?(Array) ? parsed : []
  85. when Array
  86. raw
  87. when ActionController::Parameters
  88. raw.to_unsafe_h.values
  89. else
  90. []
  91. end
  92. rescue StandardError
  93. []
  94. end
  95. def tabulator_payload(ingestions:, total:, pages:)
  96. data = ingestions.map do |row|
  97. {
  98. id: row.id,
  99. created_at: row.created_at&.iso8601,
  100. attachment_name: row.attachment_name,
  101. record_type: row.record_type,
  102. record_id: row.record_id,
  103. blob_filename: row.blob_filename,
  104. blob_content_type: row.blob_content_type,
  105. blob_byte_size: row.blob_byte_size,
  106. created_by_job_class: row.created_by_job_class,
  107. created_by_active_job_id: row.created_by_active_job_id,
  108. queue_name: row.queue_name,
  109. instagram_account_id: row.instagram_account_id,
  110. instagram_profile_id: row.instagram_profile_id,
  111. blob_url: Rails.application.routes.url_helpers.rails_blob_path(row.blob, disposition: "attachment", only_path: true),
  112. record_url: record_url_for(row)
  113. }
  114. end
  115. { data: data, last_page: pages, last_row: total }
  116. end
  117. def record_url_for(row)
  118. case row.record_type
  119. when "InstagramAccount"
  120. Rails.application.routes.url_helpers.instagram_account_path(row.record_id)
  121. when "InstagramProfile"
  122. Rails.application.routes.url_helpers.instagram_profile_path(row.record_id)
  123. when "InstagramPost"
  124. Rails.application.routes.url_helpers.instagram_post_path(row.record_id)
  125. else
  126. nil
  127. end
  128. rescue StandardError
  129. nil
  130. end
  131. end

app/controllers/ai_dashboard_controller.rb

0.0% lines covered

100.0% branches covered

344 relevant lines. 0 lines covered and 344 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class AiDashboardController < ApplicationController
  2. before_action :require_current_account!
  3. skip_forgery_protection only: [:test_service, :test_all_services]
  4. require 'net/http'
  5. require 'uri'
  6. require 'json'
  7. require 'base64'
  8. require 'securerandom'
  9. AI_SERVICE_URL = "http://localhost:8000"
  10. def index
  11. @service_status = check_ai_services(force: refresh_requested?)
  12. @test_results = {}
  13. end
  14. def test_service
  15. service_name = params[:service_name]
  16. test_type = params[:test_type]
  17. case service_name
  18. when 'vision'
  19. @test_results = test_vision_service(test_type)
  20. when 'face'
  21. @test_results = test_face_service(test_type)
  22. when 'ocr'
  23. @test_results = test_ocr_service(test_type)
  24. when 'whisper'
  25. @test_results = test_whisper_service(test_type)
  26. when 'video'
  27. @test_results = test_video_service(test_type)
  28. else
  29. @test_results = { error: "Unknown service: #{service_name}" }
  30. end
  31. respond_to do |format|
  32. format.json { render json: @test_results }
  33. format.html {
  34. flash[:notice] = "Test completed for #{service_name}"
  35. redirect_to ai_dashboard_path
  36. }
  37. end
  38. end
  39. def test_all_services
  40. @test_results = {}
  41. @test_results[:vision] = test_vision_service('labels')
  42. @test_results[:face] = test_face_service('detection')
  43. @test_results[:ocr] = test_ocr_service('text_extraction')
  44. @test_results[:whisper] = test_whisper_service('transcription')
  45. @test_results[:video] = test_video_service('analysis')
  46. respond_to do |format|
  47. format.json { render json: @test_results }
  48. format.html {
  49. flash[:notice] = "All services tested"
  50. redirect_to ai_dashboard_path
  51. }
  52. end
  53. end
  54. private
  55. def check_ai_services(force: false)
  56. health = if force
  57. Ops::LocalAiHealth.check(force: true)
  58. else
  59. Ops::LocalAiHealth.status
  60. end
  61. enqueue_health_refresh_if_needed(health: health) unless force
  62. checked_at = parse_health_checked_at(health[:checked_at])
  63. stale = ActiveModel::Type::Boolean.new.cast(health[:stale])
  64. if ActiveModel::Type::Boolean.new.cast(health[:ok])
  65. service_map = health.dig(:details, :microservice, :services) || {}
  66. service_map = service_map.merge(
  67. "ollama" => Array(health.dig(:details, :ollama, :models)).any?
  68. )
  69. {
  70. status: "online",
  71. services: service_map,
  72. stale: stale,
  73. source: health[:source].to_s,
  74. last_check: checked_at
  75. }
  76. else
  77. message = health[:error].presence || "Local AI stack unavailable"
  78. {
  79. status: "offline",
  80. message: message,
  81. stale: stale,
  82. source: health[:source].to_s,
  83. last_check: checked_at
  84. }
  85. end
  86. end
  87. def refresh_requested?
  88. ActiveModel::Type::Boolean.new.cast(params[:refresh])
  89. end
  90. def enqueue_health_refresh_if_needed(health:)
  91. stale = ActiveModel::Type::Boolean.new.cast(health[:stale])
  92. unhealthy = !ActiveModel::Type::Boolean.new.cast(health[:ok])
  93. return unless stale || unhealthy
  94. throttle_key = "ops:local_ai_health:refresh_enqueued"
  95. return if Rails.cache.read(throttle_key)
  96. job = CheckAiMicroserviceHealthJob.perform_later
  97. Rails.cache.write(throttle_key, job.job_id, expires_in: 45.seconds)
  98. rescue StandardError
  99. nil
  100. end
  101. def parse_health_checked_at(value)
  102. text = value.to_s.strip
  103. return Time.current if text.blank?
  104. Time.iso8601(text)
  105. rescue StandardError
  106. Time.current
  107. end
  108. def test_vision_service(test_type)
  109. begin
  110. case test_type
  111. when 'labels'
  112. test_image = create_test_image
  113. uri = URI("#{AI_SERVICE_URL}/analyze/image")
  114. req = Net::HTTP::Post.new(uri)
  115. # Create multipart form data
  116. boundary = "----WebKitFormBoundary#{SecureRandom.hex(16)}"
  117. post_data = []
  118. post_data << "--#{boundary}\r\n"
  119. post_data << "Content-Disposition: form-data; name=\"features\"\r\n\r\n"
  120. post_data << "labels\r\n"
  121. post_data << "--#{boundary}\r\n"
  122. post_data << "Content-Disposition: form-data; name=\"file\"; filename=\"test.png\"\r\n"
  123. post_data << "Content-Type: image/png\r\n\r\n"
  124. post_data << test_image
  125. post_data << "\r\n--#{boundary}--\r\n"
  126. req.body = post_data.join
  127. req['Content-Type'] = "multipart/form-data; boundary=#{boundary}"
  128. response = Net::HTTP.start(uri.hostname, uri.port) do |http|
  129. http.request(req)
  130. end
  131. if response.code == '200'
  132. data = JSON.parse(response.body)
  133. {
  134. success: true,
  135. result: data['results']['labels'] || [],
  136. message: "Label detection working - found #{(data['results']['labels'] || []).length} objects"
  137. }
  138. else
  139. { success: false, error: "HTTP #{response.code}: #{response.body}" }
  140. end
  141. else
  142. { success: false, error: "Unknown test type: #{test_type}" }
  143. end
  144. rescue StandardError => e
  145. { success: false, error: e.message }
  146. end
  147. end
  148. def test_face_service(test_type)
  149. begin
  150. case test_type
  151. when 'detection'
  152. test_image = create_test_image
  153. uri = URI("#{AI_SERVICE_URL}/analyze/image")
  154. req = Net::HTTP::Post.new(uri)
  155. boundary = "----WebKitFormBoundary#{SecureRandom.hex(16)}"
  156. post_data = []
  157. post_data << "--#{boundary}\r\n"
  158. post_data << "Content-Disposition: form-data; name=\"features\"\r\n\r\n"
  159. post_data << "faces\r\n"
  160. post_data << "--#{boundary}\r\n"
  161. post_data << "Content-Disposition: form-data; name=\"file\"; filename=\"test.png\"\r\n"
  162. post_data << "Content-Type: image/png\r\n\r\n"
  163. post_data << test_image
  164. post_data << "\r\n--#{boundary}--\r\n"
  165. req.body = post_data.join
  166. req['Content-Type'] = "multipart/form-data; boundary=#{boundary}"
  167. response = Net::HTTP.start(uri.hostname, uri.port) do |http|
  168. http.request(req)
  169. end
  170. if response.code == '200'
  171. data = JSON.parse(response.body)
  172. face_count = (data['results']['faces'] || []).length
  173. {
  174. success: true,
  175. result: data['results']['faces'] || [],
  176. message: "Face detection working - found #{face_count} face(s)"
  177. }
  178. else
  179. { success: false, error: "HTTP #{response.code}: #{response.body}" }
  180. end
  181. when 'embedding'
  182. test_image = create_test_image
  183. uri = URI("#{AI_SERVICE_URL}/face/embedding")
  184. req = Net::HTTP::Post.new(uri)
  185. boundary = "----WebKitFormBoundary#{SecureRandom.hex(16)}"
  186. post_data = []
  187. post_data << "--#{boundary}\r\n"
  188. post_data << "Content-Disposition: form-data; name=\"file\"; filename=\"test.png\"\r\n"
  189. post_data << "Content-Type: image/png\r\n\r\n"
  190. post_data << test_image
  191. post_data << "\r\n--#{boundary}--\r\n"
  192. req.body = post_data.join
  193. req['Content-Type'] = "multipart/form-data; boundary=#{boundary}"
  194. response = Net::HTTP.start(uri.hostname, uri.port) do |http|
  195. http.request(req)
  196. end
  197. if response.code == '200'
  198. data = JSON.parse(response.body)
  199. embedding_size = data['metadata']['embedding_size'] || 0
  200. {
  201. success: true,
  202. result: data['embedding'] ? "Embedding generated (size: #{embedding_size})" : nil,
  203. message: "Face embedding working - generated #{embedding_size}-dimensional vector"
  204. }
  205. else
  206. { success: false, error: "HTTP #{response.code}: #{response.body}" }
  207. end
  208. when 'comparison'
  209. test_image = create_test_image
  210. uri = URI("#{AI_SERVICE_URL}/face/compare")
  211. req = Net::HTTP::Post.new(uri)
  212. boundary = "----WebKitFormBoundary#{SecureRandom.hex(16)}"
  213. post_data = []
  214. post_data << "--#{boundary}\r\n"
  215. post_data << "Content-Disposition: form-data; name=\"file1\"; filename=\"test1.png\"\r\n"
  216. post_data << "Content-Type: image/png\r\n\r\n"
  217. post_data << test_image
  218. post_data << "\r\n--#{boundary}\r\n"
  219. post_data << "Content-Disposition: form-data; name=\"file2\"; filename=\"test2.png\"\r\n"
  220. post_data << "Content-Type: image/png\r\n\r\n"
  221. post_data << test_image
  222. post_data << "\r\n--#{boundary}--\r\n"
  223. req.body = post_data.join
  224. req['Content-Type'] = "multipart/form-data; boundary=#{boundary}"
  225. response = Net::HTTP.start(uri.hostname, uri.port) do |http|
  226. http.request(req)
  227. end
  228. if response.code == '200'
  229. data = JSON.parse(response.body)
  230. similarity = data['similarity'] || 0
  231. {
  232. success: true,
  233. result: data,
  234. message: "Face comparison working - similarity score: #{similarity.round(3)}"
  235. }
  236. else
  237. { success: false, error: "HTTP #{response.code}: #{response.body}" }
  238. end
  239. else
  240. { success: false, error: "Unknown test type: #{test_type}" }
  241. end
  242. rescue StandardError => e
  243. { success: false, error: e.message }
  244. end
  245. end
  246. def test_ocr_service(test_type)
  247. begin
  248. case test_type
  249. when 'text_extraction'
  250. test_image = create_test_image_with_text
  251. uri = URI("#{AI_SERVICE_URL}/analyze/image")
  252. req = Net::HTTP::Post.new(uri)
  253. boundary = "----WebKitFormBoundary#{SecureRandom.hex(16)}"
  254. post_data = []
  255. post_data << "--#{boundary}\r\n"
  256. post_data << "Content-Disposition: form-data; name=\"features\"\r\n\r\n"
  257. post_data << "text\r\n"
  258. post_data << "--#{boundary}\r\n"
  259. post_data << "Content-Disposition: form-data; name=\"file\"; filename=\"test.png\"\r\n"
  260. post_data << "Content-Type: image/png\r\n\r\n"
  261. post_data << test_image
  262. post_data << "\r\n--#{boundary}--\r\n"
  263. req.body = post_data.join
  264. req['Content-Type'] = "multipart/form-data; boundary=#{boundary}"
  265. response = Net::HTTP.start(uri.hostname, uri.port) do |http|
  266. http.request(req)
  267. end
  268. if response.code == '200'
  269. data = JSON.parse(response.body)
  270. text_count = (data['results']['text'] || []).length
  271. extracted_text = (data['results']['text'] || []).map { |t| t['text'] }.join(', ')
  272. {
  273. success: true,
  274. result: data['results']['text'] || [],
  275. message: "OCR text extraction working - found #{text_count} text region(s): #{extracted_text.length > 50 ? extracted_text[0..47] + '...' : extracted_text}"
  276. }
  277. else
  278. { success: false, error: "HTTP #{response.code}: #{response.body}" }
  279. end
  280. else
  281. { success: false, error: "Unknown test type: #{test_type}" }
  282. end
  283. rescue StandardError => e
  284. { success: false, error: e.message }
  285. end
  286. end
  287. def test_whisper_service(test_type)
  288. begin
  289. case test_type
  290. when 'transcription'
  291. # For now, just test if the endpoint responds
  292. # In a real implementation, you'd create a test audio file
  293. uri = URI("#{AI_SERVICE_URL}/transcribe/audio")
  294. req = Net::HTTP::Post.new(uri)
  295. response = Net::HTTP.start(uri.hostname, uri.port) do |http|
  296. http.request(req)
  297. end
  298. # We expect this to fail without a file, but it shows the service is running
  299. if response.code == '422' || response.code == '400'
  300. {
  301. success: true,
  302. result: "Endpoint accessible",
  303. message: "Whisper service responding"
  304. }
  305. else
  306. { success: false, error: "Unexpected response: #{response.code}" }
  307. end
  308. else
  309. { success: false, error: "Unknown test type: #{test_type}" }
  310. end
  311. rescue StandardError => e
  312. { success: false, error: e.message }
  313. end
  314. end
  315. def test_video_service(test_type)
  316. begin
  317. case test_type
  318. when 'analysis'
  319. # For now, just test if the endpoint responds
  320. uri = URI("#{AI_SERVICE_URL}/analyze/video")
  321. req = Net::HTTP::Post.new(uri)
  322. response = Net::HTTP.start(uri.hostname, uri.port) do |http|
  323. http.request(req)
  324. end
  325. # We expect this to fail without a file, but it shows the service is running
  326. if response.code == '422' || response.code == '400'
  327. {
  328. success: true,
  329. result: "Endpoint accessible",
  330. message: "Video service responding"
  331. }
  332. else
  333. { success: false, error: "Unexpected response: #{response.code}" }
  334. end
  335. else
  336. { success: false, error: "Unknown test type: #{test_type}" }
  337. end
  338. rescue StandardError => e
  339. { success: false, error: e.message }
  340. end
  341. end
  342. def create_test_image
  343. # Create a simple 1x1 pixel PNG image for testing
  344. require 'base64'
  345. # Base64 encoded 1x1 transparent PNG
  346. png_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg=="
  347. Base64.decode64(png_data)
  348. end
  349. def create_test_image_with_text
  350. # Create a simple test image that might contain some text patterns
  351. # For now, use the same test image - in a real implementation you'd create
  352. # an image with actual text for OCR testing
  353. create_test_image
  354. end
  355. end

app/controllers/application_controller.rb

0.0% lines covered

100.0% branches covered

22 relevant lines. 0 lines covered and 22 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class ApplicationController < ActionController::Base
  2. private
  3. def current_account
  4. return @current_account if defined?(@current_account)
  5. # Prefer an explicitly selected account (multi-account support).
  6. selected_id = session[:instagram_account_id]
  7. @current_account =
  8. if selected_id.present?
  9. InstagramAccount.find_by(id: selected_id)
  10. end
  11. # Fallback to the first account if none selected.
  12. @current_account ||= InstagramAccount.order(:id).first
  13. # Optional bootstrap for older single-account setups.
  14. if @current_account.nil?
  15. bootstrap_username = Rails.application.config.x.instagram.username.to_s.strip
  16. @current_account = InstagramAccount.create!(username: bootstrap_username) if bootstrap_username.present?
  17. end
  18. @current_account
  19. end
  20. helper_method :current_account
  21. def require_current_account!
  22. return if current_account.present?
  23. redirect_to instagram_accounts_path, alert: "Add an Instagram account first."
  24. end
  25. end

app/controllers/concerns/profile_post_preview_support.rb

0.0% lines covered

100.0% branches covered

41 relevant lines. 0 lines covered and 41 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module ProfilePostPreviewSupport
  2. extend ActiveSupport::Concern
  3. PROFILE_POST_PREVIEW_ENQUEUE_TTL_SECONDS = 30.minutes
  4. included do
  5. helper_method :preferred_profile_post_preview_image_url
  6. end
  7. private
  8. def preferred_profile_post_preview_image_url(post:, metadata:)
  9. if post.preview_image.attached?
  10. return Rails.application.routes.url_helpers.rails_blob_path(post.preview_image, only_path: true)
  11. end
  12. data = metadata.is_a?(Hash) ? metadata : {}
  13. direct_url = [
  14. data["preview_image_url"],
  15. data["poster_url"],
  16. data["image_url"],
  17. data["media_url_image"]
  18. ].find(&:present?)
  19. return direct_url.to_s if direct_url.present?
  20. local_profile_post_preview_representation_url(post: post)
  21. end
  22. def local_profile_post_preview_representation_url(post:)
  23. return nil unless post.media.attached?
  24. return nil unless post.media.blob&.content_type.to_s.start_with?("video/")
  25. enqueue_profile_post_preview_generation(post: post)
  26. view_context.url_for(post.media.preview(resize_to_limit: [ 640, 640 ]))
  27. rescue StandardError
  28. nil
  29. end
  30. def enqueue_profile_post_preview_generation(post:)
  31. return if post.preview_image.attached?
  32. cache_key = "profile_post:preview_enqueue:#{post.id}"
  33. Rails.cache.fetch(cache_key, expires_in: PROFILE_POST_PREVIEW_ENQUEUE_TTL_SECONDS) do
  34. GenerateProfilePostPreviewImageJob.perform_later(instagram_profile_post_id: post.id)
  35. true
  36. end
  37. rescue StandardError => e
  38. Rails.logger.warn("[profile_post_preview] preview enqueue failed post_id=#{post.id}: #{e.class}: #{e.message}")
  39. nil
  40. end
  41. end

app/controllers/feed_captures_controller.rb

0.0% lines covered

100.0% branches covered

37 relevant lines. 0 lines covered and 37 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class FeedCapturesController < ApplicationController
  2. before_action :require_current_account!
  3. def create
  4. rounds = params.fetch(:rounds, 4).to_i.clamp(1, 25)
  5. delay_seconds = params.fetch(:delay_seconds, 45).to_i.clamp(10, 120)
  6. max_new = params.fetch(:max_new, 20).to_i.clamp(1, 200)
  7. CaptureHomeFeedJob.perform_later(
  8. instagram_account_id: current_account.id,
  9. rounds: rounds,
  10. delay_seconds: delay_seconds,
  11. max_new: max_new
  12. )
  13. respond_to do |format|
  14. format.html { redirect_back fallback_location: instagram_account_path(current_account), notice: "Feed capture queued." }
  15. format.turbo_stream do
  16. render turbo_stream: turbo_stream.append(
  17. "notifications",
  18. partial: "shared/notification",
  19. locals: { kind: "notice", message: "Feed capture queued." }
  20. )
  21. end
  22. format.json { head :accepted }
  23. end
  24. rescue StandardError => e
  25. respond_to do |format|
  26. format.html { redirect_back fallback_location: instagram_account_path(current_account), alert: "Unable to queue feed capture: #{e.message}" }
  27. format.turbo_stream do
  28. render turbo_stream: turbo_stream.append(
  29. "notifications",
  30. partial: "shared/notification",
  31. locals: { kind: "alert", message: "Unable to queue feed capture: #{e.message}" }
  32. )
  33. end
  34. format.json { render json: { error: e.message }, status: :unprocessable_entity }
  35. end
  36. end
  37. end

app/controllers/follow_graph_syncs_controller.rb

0.0% lines covered

100.0% branches covered

37 relevant lines. 0 lines covered and 37 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class FollowGraphSyncsController < ApplicationController
  2. before_action :require_current_account!
  3. def create
  4. run = current_account.sync_runs.create!(kind: "follow_graph", status: "queued")
  5. SyncFollowGraphJob.perform_later(instagram_account_id: current_account.id, sync_run_id: run.id)
  6. respond_to do |format|
  7. format.html { redirect_to instagram_profiles_path, notice: "Follow graph sync queued. You will be notified when it completes." }
  8. format.turbo_stream do
  9. render turbo_stream: [
  10. turbo_stream.append(
  11. "notifications",
  12. partial: "shared/notification",
  13. locals: { kind: "notice", message: "Follow graph sync queued. You will be notified when it completes." }
  14. ),
  15. turbo_stream.replace(
  16. "sync_status",
  17. partial: "sync_runs/status",
  18. locals: { sync_run: run }
  19. )
  20. ]
  21. end
  22. format.json { head :accepted }
  23. end
  24. rescue StandardError => e
  25. respond_to do |format|
  26. format.html { redirect_to instagram_profiles_path, alert: "Unable to queue follow graph sync: #{e.message}" }
  27. format.turbo_stream do
  28. render turbo_stream: turbo_stream.append(
  29. "notifications",
  30. partial: "shared/notification",
  31. locals: { kind: "alert", message: "Unable to queue follow graph sync: #{e.message}" }
  32. )
  33. end
  34. format.json { render json: { error: e.message }, status: :unprocessable_entity }
  35. end
  36. end
  37. end

app/controllers/instagram_accounts_controller.rb

0.0% lines covered

100.0% branches covered

319 relevant lines. 0 lines covered and 319 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramAccountsController < ApplicationController
  2. STORY_SYNC_LIMIT = SyncHomeStoryCarouselJob::STORY_BATCH_LIMIT
  3. CONTINUOUS_STORY_SYNC_CYCLE_LIMIT = SyncAllHomeStoriesJob::MAX_CYCLES
  4. STORY_ARCHIVE_SLOW_REQUEST_MS = Integer(ENV.fetch("STORY_ARCHIVE_SLOW_REQUEST_MS", "2000"))
  5. before_action :set_account, only: %i[
  6. show update destroy select manual_login import_cookies export_cookies validate_session
  7. sync_next_profiles sync_profile_stories sync_stories_with_comments
  8. sync_all_stories_continuous story_media_archive generate_llm_comment technical_details
  9. run_continuous_processing
  10. ]
  11. before_action :normalize_navigation_format, only: %i[show]
  12. around_action :log_story_media_archive_request, only: %i[story_media_archive]
  13. def index
  14. @accounts = InstagramAccount.order(:id).to_a
  15. @metrics = Ops::Metrics.system
  16. end
  17. def show
  18. session[:instagram_account_id] = @account.id if session[:instagram_account_id].blank?
  19. snapshot = InstagramAccounts::DashboardSnapshotService.new(account: @account).call
  20. @issues = snapshot[:issues]
  21. @metrics = snapshot[:metrics]
  22. @latest_sync_run = snapshot[:latest_sync_run]
  23. @recent_failures = snapshot[:recent_failures]
  24. @recent_audit_entries = snapshot[:recent_audit_entries]
  25. @actions_todo_queue = snapshot[:actions_todo_queue]
  26. @skip_diagnostics = snapshot[:skip_diagnostics]
  27. end
  28. def create
  29. username = params.dig(:instagram_account, :username).to_s.strip
  30. raise "Username cannot be blank" if username.blank?
  31. account = InstagramAccount.create!(username: username)
  32. session[:instagram_account_id] = account.id
  33. redirect_to instagram_account_path(account), notice: "Account added."
  34. rescue StandardError => e
  35. redirect_to instagram_accounts_path, alert: "Unable to add account: #{e.message}"
  36. end
  37. def update
  38. if @account.update(account_params)
  39. redirect_to instagram_account_path(@account), notice: "Account updated."
  40. else
  41. redirect_to instagram_account_path(@account), alert: @account.errors.full_messages.to_sentence
  42. end
  43. end
  44. def destroy
  45. @account.destroy!
  46. session[:instagram_account_id] = nil if session[:instagram_account_id].to_i == @account.id
  47. redirect_to instagram_accounts_path, notice: "Account removed."
  48. rescue StandardError => e
  49. redirect_to instagram_account_path(@account), alert: "Unable to remove account: #{e.message}"
  50. end
  51. def select
  52. session[:instagram_account_id] = @account.id
  53. redirect_to instagram_account_path(@account), notice: "Selected #{@account.username}.", status: :see_other
  54. end
  55. def manual_login
  56. Instagram::Client.new(account: @account).manual_login!(timeout_seconds: timeout_seconds)
  57. @account.update!(login_state: "authenticated")
  58. redirect_to instagram_account_path(@account), notice: "Manual login completed and session bundle saved."
  59. rescue StandardError => e
  60. redirect_to instagram_account_path(@account), alert: "Manual login failed: #{e.message}"
  61. end
  62. def import_cookies
  63. payload = params[:cookies_json].to_s
  64. parsed = JSON.parse(payload)
  65. @account.cookies = parsed
  66. @account.login_state = "authenticated"
  67. @account.save!
  68. redirect_to instagram_account_path(@account), notice: "Cookies imported successfully."
  69. rescue JSON::ParserError
  70. redirect_to instagram_account_path(@account), alert: "Invalid JSON format for cookies."
  71. rescue StandardError => e
  72. redirect_to instagram_account_path(@account), alert: "Cookie import failed: #{e.message}"
  73. end
  74. def export_cookies
  75. send_data(
  76. JSON.pretty_generate(@account.cookies),
  77. filename: "instagram_cookies_#{@account.username}.json",
  78. type: "application/json"
  79. )
  80. end
  81. def validate_session
  82. client = Instagram::Client.new(account: @account)
  83. validation_result = client.validate_session!
  84. respond_to do |format|
  85. format.html { redirect_to instagram_account_path(@account), notice: validation_result[:message] }
  86. format.turbo_stream do
  87. render turbo_stream: turbo_stream.append(
  88. "notifications",
  89. partial: "shared/notification",
  90. locals: { kind: validation_result[:valid] ? "notice" : "alert", message: validation_result[:message] }
  91. )
  92. end
  93. format.json { render json: validation_result }
  94. end
  95. rescue StandardError => e
  96. error_message = "Session validation failed: #{e.message}"
  97. respond_to do |format|
  98. format.html { redirect_to instagram_account_path(@account), alert: error_message }
  99. format.turbo_stream do
  100. render turbo_stream: turbo_stream.append(
  101. "notifications",
  102. partial: "shared/notification",
  103. locals: { kind: "alert", message: error_message }
  104. )
  105. end
  106. format.json { render json: { valid: false, message: error_message }, status: :unprocessable_entity }
  107. end
  108. end
  109. def sync_next_profiles
  110. limit = params.fetch(:limit, 10).to_i.clamp(1, 50)
  111. SyncNextProfilesForAccountJob.perform_later(instagram_account_id: @account.id, limit: limit)
  112. respond_to do |format|
  113. format.html { redirect_to instagram_account_path(@account), notice: "Queued sync for next #{limit} profiles." }
  114. format.turbo_stream do
  115. render turbo_stream: turbo_stream.append(
  116. "notifications",
  117. partial: "shared/notification",
  118. locals: { kind: "notice", message: "Queued sync for next #{limit} profiles." }
  119. )
  120. end
  121. format.json { head :accepted }
  122. end
  123. rescue StandardError => e
  124. respond_to do |format|
  125. format.html { redirect_to instagram_account_path(@account), alert: "Unable to queue next-profile sync: #{e.message}" }
  126. format.turbo_stream do
  127. render turbo_stream: turbo_stream.append(
  128. "notifications",
  129. partial: "shared/notification",
  130. locals: { kind: "alert", message: "Unable to queue next-profile sync: #{e.message}" }
  131. )
  132. end
  133. format.json { render json: { error: e.message }, status: :unprocessable_entity }
  134. end
  135. end
  136. def sync_profile_stories
  137. story_limit = params.fetch(:story_limit, STORY_SYNC_LIMIT).to_i.clamp(1, STORY_SYNC_LIMIT)
  138. SyncHomeStoryCarouselJob.perform_later(
  139. instagram_account_id: @account.id,
  140. story_limit: story_limit,
  141. auto_reply_only: false
  142. )
  143. respond_to do |format|
  144. format.html { redirect_to instagram_account_path(@account), notice: "Queued next #{story_limit} stories." }
  145. format.turbo_stream do
  146. render turbo_stream: turbo_stream.append(
  147. "notifications",
  148. partial: "shared/notification",
  149. locals: { kind: "notice", message: "Queued next #{story_limit} stories." }
  150. )
  151. end
  152. format.json { head :accepted }
  153. end
  154. rescue StandardError => e
  155. respond_to do |format|
  156. format.html { redirect_to instagram_account_path(@account), alert: "Unable to queue story sync: #{e.message}" }
  157. format.turbo_stream do
  158. render turbo_stream: turbo_stream.append(
  159. "notifications",
  160. partial: "shared/notification",
  161. locals: { kind: "alert", message: "Unable to queue story sync: #{e.message}" }
  162. )
  163. end
  164. format.json { render json: { error: e.message }, status: :unprocessable_entity }
  165. end
  166. end
  167. def sync_stories_with_comments
  168. story_limit = params.fetch(:story_limit, STORY_SYNC_LIMIT).to_i.clamp(1, STORY_SYNC_LIMIT)
  169. SyncHomeStoryCarouselJob.perform_later(
  170. instagram_account_id: @account.id,
  171. story_limit: story_limit,
  172. auto_reply_only: true
  173. )
  174. respond_to do |format|
  175. format.html { redirect_to instagram_account_path(@account), notice: "Queued next #{story_limit} stories (auto-reply tag required)." }
  176. format.turbo_stream do
  177. render turbo_stream: turbo_stream.append(
  178. "notifications",
  179. partial: "shared/notification",
  180. locals: { kind: "notice", message: "Queued next #{story_limit} stories (auto-reply tag required)." }
  181. )
  182. end
  183. format.json { head :accepted }
  184. end
  185. rescue StandardError => e
  186. respond_to do |format|
  187. format.html { redirect_to instagram_account_path(@account), alert: "Unable to queue story sync with comments: #{e.message}" }
  188. format.turbo_stream do
  189. render turbo_stream: turbo_stream.append(
  190. "notifications",
  191. partial: "shared/notification",
  192. locals: { kind: "alert", message: "Unable to queue story sync with comments: #{e.message}" }
  193. )
  194. end
  195. format.json { render json: { error: e.message }, status: :unprocessable_entity }
  196. end
  197. end
  198. def sync_all_stories_continuous
  199. SyncAllHomeStoriesJob.perform_later(
  200. instagram_account_id: @account.id,
  201. cycle_story_limit: STORY_SYNC_LIMIT
  202. )
  203. respond_to do |format|
  204. format.html { redirect_to instagram_account_path(@account), notice: "Queued continuous story sync with auto-replies." }
  205. format.turbo_stream do
  206. render turbo_stream: turbo_stream.append(
  207. "notifications",
  208. partial: "shared/notification",
  209. locals: { kind: "notice", message: "Queued continuous story sync with auto-replies." }
  210. )
  211. end
  212. format.json { head :accepted }
  213. end
  214. rescue StandardError => e
  215. respond_to do |format|
  216. format.html { redirect_to instagram_account_path(@account), alert: "Unable to queue continuous story sync: #{e.message}" }
  217. format.turbo_stream do
  218. render turbo_stream: turbo_stream.append(
  219. "notifications",
  220. partial: "shared/notification",
  221. locals: { kind: "alert", message: "Unable to queue continuous story sync: #{e.message}" }
  222. )
  223. end
  224. format.json { render json: { error: e.message }, status: :unprocessable_entity }
  225. end
  226. end
  227. def run_continuous_processing
  228. trigger_source = params[:trigger_source].to_s.presence || "manual_account_trigger"
  229. ProcessInstagramAccountContinuouslyJob.perform_later(
  230. instagram_account_id: @account.id,
  231. trigger_source: trigger_source
  232. )
  233. respond_to do |format|
  234. format.html { redirect_to instagram_account_path(@account), notice: "Queued continuous processing pipeline." }
  235. format.turbo_stream do
  236. render turbo_stream: turbo_stream.append(
  237. "notifications",
  238. partial: "shared/notification",
  239. locals: { kind: "notice", message: "Queued continuous processing pipeline." }
  240. )
  241. end
  242. format.json { render json: { status: "queued" }, status: :accepted }
  243. end
  244. rescue StandardError => e
  245. respond_to do |format|
  246. format.html { redirect_to instagram_account_path(@account), alert: "Unable to queue continuous processing: #{e.message}" }
  247. format.turbo_stream do
  248. render turbo_stream: turbo_stream.append(
  249. "notifications",
  250. partial: "shared/notification",
  251. locals: { kind: "alert", message: "Unable to queue continuous processing: #{e.message}" }
  252. )
  253. end
  254. format.json { render json: { error: e.message }, status: :unprocessable_entity }
  255. end
  256. end
  257. def story_media_archive
  258. result = InstagramAccounts::StoryArchiveQuery.new(
  259. account: @account,
  260. page: params.fetch(:page, 1),
  261. per_page: params.fetch(:per_page, 12),
  262. on: params[:on]
  263. ).call
  264. render json: {
  265. items: result.events.map { |event| InstagramAccounts::StoryArchiveItemSerializer.new(event: event).call },
  266. page: result.page,
  267. per_page: result.per_page,
  268. total: result.total,
  269. has_more: result.has_more,
  270. on: result.on&.iso8601
  271. }
  272. rescue StandardError => e
  273. render json: { error: e.message }, status: :unprocessable_entity
  274. end
  275. def generate_llm_comment
  276. result = InstagramAccounts::LlmCommentRequestService.new(
  277. account: @account,
  278. event_id: params.require(:event_id),
  279. provider: params.fetch(:provider, :local),
  280. model: params[:model].presence,
  281. status_only: params[:status_only]
  282. ).call
  283. render json: result.payload, status: result.status
  284. end
  285. def technical_details
  286. result = InstagramAccounts::TechnicalDetailsPayloadService.new(
  287. account: @account,
  288. event_id: params.require(:event_id)
  289. ).call
  290. render json: result.payload, status: result.status
  291. end
  292. private
  293. def set_account
  294. @account = InstagramAccount.find(params[:id])
  295. end
  296. def account_params
  297. params.require(:instagram_account).permit(:username)
  298. end
  299. def timeout_seconds
  300. params.fetch(:timeout_seconds, 180).to_i.clamp(60, 900)
  301. end
  302. def log_story_media_archive_request
  303. started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
  304. yield
  305. ensure
  306. elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at) * 1000.0).round(1)
  307. return if elapsed_ms < STORY_ARCHIVE_SLOW_REQUEST_MS
  308. pool_stats = ActiveRecord::Base.connection_pool.stat rescue {}
  309. Rails.logger.warn(
  310. "[story_media_archive] slow request " \
  311. "account_id=#{@account&.id} elapsed_ms=#{elapsed_ms} " \
  312. "pool_size=#{pool_stats[:size]} pool_busy=#{pool_stats[:busy]} " \
  313. "pool_waiting=#{pool_stats[:waiting]}"
  314. )
  315. end
  316. def normalize_navigation_format
  317. request.format = :html if request.format.turbo_stream?
  318. end
  319. end

app/controllers/instagram_posts_controller.rb

0.0% lines covered

100.0% branches covered

131 relevant lines. 0 lines covered and 131 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramPostsController < ApplicationController
  2. before_action :require_current_account!
  3. def index
  4. @account = current_account
  5. scope = @account.instagram_posts
  6. scope = apply_tabulator_filters(scope)
  7. @q = params[:q].to_s.strip
  8. if @q.present?
  9. term = "%#{@q.downcase}%"
  10. scope = scope.where("LOWER(shortcode) LIKE ? OR LOWER(COALESCE(author_username,'')) LIKE ?", term, term)
  11. end
  12. if params[:status].present?
  13. scope = scope.where(status: params[:status].to_s)
  14. end
  15. scope = apply_remote_sort(scope) || scope.order(detected_at: :desc, id: :desc)
  16. page = params.fetch(:page, 1).to_i
  17. page = 1 if page < 1
  18. per_page_param = params[:per_page].presence || params[:size].presence
  19. per_page = per_page_param.to_i
  20. per_page = 50 if per_page <= 0
  21. per_page = per_page.clamp(10, 200)
  22. total = scope.count
  23. pages = (total / per_page.to_f).ceil
  24. posts = scope.offset((page - 1) * per_page).limit(per_page)
  25. respond_to do |format|
  26. format.html
  27. format.json do
  28. render json: tabulator_payload(posts: posts, total: total, pages: pages)
  29. end
  30. end
  31. end
  32. def show
  33. @account = current_account
  34. @post = @account.instagram_posts.find(params[:id])
  35. @latest_analysis = @post.ai_analyses.where(purpose: "post").recent_first.first
  36. end
  37. private
  38. def apply_tabulator_filters(scope)
  39. extract_tabulator_filters.each do |f|
  40. field = f[:field]
  41. value = f[:value]
  42. next if value.blank?
  43. case field
  44. when "author_username"
  45. term = "%#{value.downcase}%"
  46. scope = scope.where("LOWER(COALESCE(author_username,'')) LIKE ?", term)
  47. when "status"
  48. scope = scope.where(status: value.to_s)
  49. when "post_kind"
  50. scope = scope.where(post_kind: value.to_s)
  51. end
  52. end
  53. scope
  54. end
  55. def extract_tabulator_filters
  56. raw = params[:filters].presence || params[:filter]
  57. return [] unless raw.present?
  58. entries =
  59. case raw
  60. when String
  61. JSON.parse(raw)
  62. when Array
  63. raw
  64. when ActionController::Parameters
  65. raw.to_unsafe_h.values
  66. else
  67. []
  68. end
  69. Array(entries).filter_map do |item|
  70. h = item.respond_to?(:to_h) ? item.to_h : {}
  71. field = h["field"].to_s
  72. next if field.blank?
  73. { field: field, value: h["value"] }
  74. end
  75. rescue StandardError
  76. []
  77. end
  78. def apply_remote_sort(scope)
  79. sorters = extract_tabulator_sorters
  80. return nil unless sorters.is_a?(Array)
  81. first = sorters.first
  82. return nil unless first.respond_to?(:[])
  83. field = first["field"].to_s
  84. dir = first["dir"].to_s.downcase == "desc" ? "DESC" : "ASC"
  85. case field
  86. when "detected_at"
  87. scope.order(Arel.sql("detected_at #{dir}, id #{dir}"))
  88. when "author_username"
  89. scope.order(Arel.sql("author_username #{dir} NULLS LAST, detected_at DESC, id DESC"))
  90. when "status"
  91. scope.order(Arel.sql("status #{dir}, detected_at DESC, id DESC"))
  92. else
  93. nil
  94. end
  95. end
  96. def tabulator_payload(posts:, total:, pages:)
  97. data = posts.map do |p|
  98. {
  99. id: p.id,
  100. shortcode: p.shortcode,
  101. post_kind: p.post_kind,
  102. author_username: p.author_username,
  103. detected_at: p.detected_at&.iso8601,
  104. status: p.status,
  105. relevant: p.analysis.is_a?(Hash) ? p.analysis["relevant"] : nil,
  106. author_type: p.analysis.is_a?(Hash) ? p.analysis["author_type"] : nil,
  107. permalink: p.permalink,
  108. media_attached: p.media.attached?,
  109. open_url: Rails.application.routes.url_helpers.instagram_post_path(p)
  110. }
  111. end
  112. { data: data, last_page: pages, last_row: total }
  113. end
  114. def extract_tabulator_sorters
  115. raw = params[:sorters].presence || params[:sort]
  116. return [] unless raw.present?
  117. case raw
  118. when String
  119. parsed = JSON.parse(raw)
  120. parsed.is_a?(Array) ? parsed : []
  121. when Array
  122. raw
  123. when ActionController::Parameters
  124. raw.to_unsafe_h.values
  125. else
  126. []
  127. end
  128. rescue StandardError
  129. []
  130. end
  131. end

app/controllers/instagram_profile_actions_controller.rb

0.0% lines covered

100.0% branches covered

369 relevant lines. 0 lines covered and 369 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramProfileActionsController < ApplicationController
  2. before_action :require_current_account!
  3. def download_missing_avatars
  4. DownloadMissingAvatarsJob.perform_later(instagram_account_id: current_account.id)
  5. respond_to do |format|
  6. format.html { redirect_back fallback_location: instagram_profiles_path, notice: "Avatar sync queued." }
  7. format.turbo_stream do
  8. render turbo_stream: turbo_stream.append(
  9. "notifications",
  10. partial: "shared/notification",
  11. locals: { kind: "notice", message: "Avatar sync queued." }
  12. )
  13. end
  14. format.json { head :accepted }
  15. end
  16. rescue StandardError => e
  17. respond_to do |format|
  18. format.html { redirect_back fallback_location: instagram_profiles_path, alert: "Unable to queue avatar sync: #{e.message}" }
  19. format.turbo_stream do
  20. render turbo_stream: turbo_stream.append(
  21. "notifications",
  22. partial: "shared/notification",
  23. locals: { kind: "alert", message: "Unable to queue avatar sync: #{e.message}" }
  24. )
  25. end
  26. format.json { render json: { error: e.message }, status: :unprocessable_entity }
  27. end
  28. end
  29. def analyze
  30. profile = current_account.instagram_profiles.find(params[:id])
  31. enqueue_profile_job(
  32. profile: profile,
  33. action: "analyze_profile",
  34. job_class: AnalyzeInstagramProfileJob
  35. )
  36. respond_to do |format|
  37. format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "AI analysis queued." }
  38. format.turbo_stream do
  39. render turbo_stream: queued_action_streams(profile: profile, message: "AI analysis queued for #{profile.username}.")
  40. end
  41. format.json { head :accepted }
  42. end
  43. rescue StandardError => e
  44. respond_to do |format|
  45. format.html { redirect_back fallback_location: instagram_profile_path(params[:id]), alert: "Unable to queue AI analysis: #{e.message}" }
  46. format.turbo_stream do
  47. render turbo_stream: turbo_stream.append(
  48. "notifications",
  49. partial: "shared/notification",
  50. locals: { kind: "alert", message: "Unable to queue AI analysis: #{e.message}" }
  51. )
  52. end
  53. format.json { render json: { error: e.message }, status: :unprocessable_entity }
  54. end
  55. end
  56. def capture_posts
  57. profile = current_account.instagram_profiles.find(params[:id])
  58. enqueue_profile_job(
  59. profile: profile,
  60. action: "capture_profile_posts",
  61. job_class: CaptureInstagramProfilePostsJob,
  62. extra_job_args: {
  63. comments_limit: 20
  64. }
  65. )
  66. respond_to do |format|
  67. format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "Profile post capture queued." }
  68. format.turbo_stream do
  69. render turbo_stream: queued_action_streams(profile: profile, message: "Profile post capture queued for #{profile.username}.")
  70. end
  71. format.json { head :accepted }
  72. end
  73. rescue StandardError => e
  74. respond_to do |format|
  75. format.html { redirect_back fallback_location: instagram_profile_path(params[:id]), alert: "Unable to queue post capture: #{e.message}" }
  76. format.turbo_stream do
  77. render turbo_stream: turbo_stream.append(
  78. "notifications",
  79. partial: "shared/notification",
  80. locals: { kind: "alert", message: "Unable to queue post capture: #{e.message}" }
  81. )
  82. end
  83. format.json { render json: { error: e.message }, status: :unprocessable_entity }
  84. end
  85. end
  86. def fetch_details
  87. profile = current_account.instagram_profiles.find(params[:id])
  88. enqueue_profile_job(
  89. profile: profile,
  90. action: "fetch_profile_details",
  91. job_class: FetchInstagramProfileDetailsJob
  92. )
  93. respond_to do |format|
  94. format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "Profile fetch queued." }
  95. format.turbo_stream do
  96. render turbo_stream: queued_action_streams(profile: profile, message: "Profile fetch queued for #{profile.username}.")
  97. end
  98. format.json { head :accepted }
  99. end
  100. rescue StandardError => e
  101. respond_to do |format|
  102. format.html { redirect_back fallback_location: instagram_profile_path(params[:id]), alert: "Unable to queue fetch: #{e.message}" }
  103. format.turbo_stream do
  104. render turbo_stream: turbo_stream.append(
  105. "notifications",
  106. partial: "shared/notification",
  107. locals: { kind: "alert", message: "Unable to queue fetch: #{e.message}" }
  108. )
  109. end
  110. format.json { render json: { error: e.message }, status: :unprocessable_entity }
  111. end
  112. end
  113. def build_history
  114. profile = current_account.instagram_profiles.find(params[:id])
  115. enqueue_profile_job(
  116. profile: profile,
  117. action: "build_history",
  118. job_class: BuildInstagramProfileHistoryJob
  119. )
  120. respond_to do |format|
  121. format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "History build queued." }
  122. format.turbo_stream do
  123. render turbo_stream: queued_action_streams(profile: profile, message: "History build queued for #{profile.username}.")
  124. end
  125. format.json { head :accepted }
  126. end
  127. rescue StandardError => e
  128. respond_to do |format|
  129. format.html { redirect_back fallback_location: instagram_profile_path(params[:id]), alert: "Unable to queue history build: #{e.message}" }
  130. format.turbo_stream do
  131. render turbo_stream: turbo_stream.append(
  132. "notifications",
  133. partial: "shared/notification",
  134. locals: { kind: "alert", message: "Unable to queue history build: #{e.message}" }
  135. )
  136. end
  137. format.json { render json: { error: e.message }, status: :unprocessable_entity }
  138. end
  139. end
  140. def verify_messageability
  141. profile = current_account.instagram_profiles.find(params[:id])
  142. enqueue_profile_job(
  143. profile: profile,
  144. action: "verify_messageability",
  145. job_class: VerifyInstagramMessageabilityJob
  146. )
  147. respond_to do |format|
  148. format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "Messageability check queued." }
  149. format.turbo_stream do
  150. render turbo_stream: queued_action_streams(profile: profile, message: "Messageability check queued for #{profile.username}.")
  151. end
  152. format.json { head :accepted }
  153. end
  154. rescue StandardError => e
  155. respond_to do |format|
  156. format.html { redirect_back fallback_location: instagram_profile_path(params[:id]), alert: "Unable to queue messageability check: #{e.message}" }
  157. format.turbo_stream do
  158. render turbo_stream: turbo_stream.append(
  159. "notifications",
  160. partial: "shared/notification",
  161. locals: { kind: "alert", message: "Unable to queue messageability check: #{e.message}" }
  162. )
  163. end
  164. format.json { render json: { error: e.message }, status: :unprocessable_entity }
  165. end
  166. end
  167. def download_avatar
  168. profile = current_account.instagram_profiles.find(params[:id])
  169. enqueue_profile_job(
  170. profile: profile,
  171. action: "sync_avatar",
  172. job_class: DownloadInstagramProfileAvatarJob
  173. )
  174. respond_to do |format|
  175. format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "Avatar download queued." }
  176. format.turbo_stream do
  177. render turbo_stream: queued_action_streams(profile: profile, message: "Avatar download queued for #{profile.username}.")
  178. end
  179. format.json { head :accepted }
  180. end
  181. rescue StandardError => e
  182. respond_to do |format|
  183. format.html { redirect_back fallback_location: instagram_profile_path(params[:id]), alert: "Unable to queue avatar download: #{e.message}" }
  184. format.turbo_stream do
  185. render turbo_stream: turbo_stream.append(
  186. "notifications",
  187. partial: "shared/notification",
  188. locals: { kind: "alert", message: "Unable to queue avatar download: #{e.message}" }
  189. )
  190. end
  191. format.json { render json: { error: e.message }, status: :unprocessable_entity }
  192. end
  193. end
  194. def sync_stories
  195. profile = current_account.instagram_profiles.find(params[:id])
  196. enqueue_profile_job(
  197. profile: profile,
  198. action: "sync_stories",
  199. job_class: SyncInstagramProfileStoriesJob,
  200. extra_job_args: {
  201. max_stories: 10,
  202. force_analyze_all: false,
  203. auto_reply: false
  204. }
  205. )
  206. respond_to do |format|
  207. format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "Story sync queued." }
  208. format.turbo_stream do
  209. render turbo_stream: queued_action_streams(profile: profile, message: "Story sync queued for #{profile.username}.")
  210. end
  211. format.json { head :accepted }
  212. end
  213. rescue StandardError => e
  214. respond_to do |format|
  215. format.html { redirect_back fallback_location: instagram_profile_path(params[:id]), alert: "Unable to queue story sync: #{e.message}" }
  216. format.turbo_stream do
  217. render turbo_stream: turbo_stream.append(
  218. "notifications",
  219. partial: "shared/notification",
  220. locals: { kind: "alert", message: "Unable to queue story sync: #{e.message}" }
  221. )
  222. end
  223. format.json { render json: { error: e.message }, status: :unprocessable_entity }
  224. end
  225. end
  226. def sync_stories_force
  227. profile = current_account.instagram_profiles.find(params[:id])
  228. enqueue_profile_job(
  229. profile: profile,
  230. action: "sync_stories",
  231. job_class: SyncInstagramProfileStoriesJob,
  232. extra_job_args: {
  233. max_stories: 10,
  234. force_analyze_all: true,
  235. auto_reply: false
  236. }
  237. )
  238. respond_to do |format|
  239. format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "Force story analysis queued." }
  240. format.turbo_stream do
  241. render turbo_stream: queued_action_streams(profile: profile, message: "Force story analysis queued for #{profile.username}.")
  242. end
  243. format.json { head :accepted }
  244. end
  245. rescue StandardError => e
  246. respond_to do |format|
  247. format.html { redirect_back fallback_location: instagram_profile_path(params[:id]), alert: "Unable to queue force story analysis: #{e.message}" }
  248. format.turbo_stream do
  249. render turbo_stream: turbo_stream.append(
  250. "notifications",
  251. partial: "shared/notification",
  252. locals: { kind: "alert", message: "Unable to queue force story analysis: #{e.message}" }
  253. )
  254. end
  255. format.json { render json: { error: e.message }, status: :unprocessable_entity }
  256. end
  257. end
  258. def sync_stories_debug
  259. profile = current_account.instagram_profiles.find(params[:id])
  260. # Clean up existing debug files for this profile
  261. cleanup_profile_debug_files(profile.username)
  262. enqueue_profile_job(
  263. profile: profile,
  264. action: "sync_stories_debug",
  265. job_class: SyncInstagramProfileStoriesJob,
  266. extra_job_args: {
  267. max_stories: 10,
  268. force_analyze_all: false,
  269. auto_reply: false
  270. }
  271. )
  272. respond_to do |format|
  273. format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "Debug story sync queued. HTML snapshots will be captured." }
  274. format.turbo_stream do
  275. render turbo_stream: queued_action_streams(profile: profile, message: "Debug story sync queued for #{profile.username}. HTML snapshots will be captured.")
  276. end
  277. format.json { head :accepted }
  278. end
  279. rescue StandardError => e
  280. respond_to do |format|
  281. format.html { redirect_back fallback_location: instagram_profile_path(params[:id]), alert: "Unable to queue debug story sync: #{e.message}" }
  282. format.turbo_stream do
  283. render turbo_stream: turbo_stream.append(
  284. "notifications",
  285. partial: "shared/notification",
  286. locals: { kind: "alert", message: "Unable to queue debug story sync: #{e.message}" }
  287. )
  288. end
  289. format.json { render json: { error: e.message }, status: :unprocessable_entity }
  290. end
  291. end
  292. private
  293. def queued_action_streams(profile:, message:)
  294. action_logs = profile.instagram_profile_action_logs.recent_first.limit(100)
  295. [
  296. turbo_stream.append(
  297. "notifications",
  298. partial: "shared/notification",
  299. locals: { kind: "notice", message: message }
  300. ),
  301. turbo_stream.replace(
  302. "action_history_section",
  303. partial: "instagram_profiles/action_history_section",
  304. locals: { action_logs: action_logs }
  305. )
  306. ]
  307. end
  308. def enqueue_profile_job(profile:, action:, job_class:, extra_job_args: {})
  309. log = profile.instagram_profile_action_logs.create!(
  310. instagram_account: current_account,
  311. action: action,
  312. status: "queued",
  313. trigger_source: "ui",
  314. occurred_at: Time.current,
  315. metadata: { requested_by: "InstagramProfileActionsController" }
  316. )
  317. begin
  318. job = job_class.perform_later(
  319. instagram_account_id: current_account.id,
  320. instagram_profile_id: profile.id,
  321. profile_action_log_id: log.id,
  322. **extra_job_args
  323. )
  324. log.update!(
  325. active_job_id: job.job_id,
  326. queue_name: job.queue_name
  327. )
  328. annotate_queue_worker_health!(log: log)
  329. rescue StandardError => e
  330. log.mark_failed!(error_message: "Queueing failed: #{e.message}")
  331. raise
  332. end
  333. end
  334. def annotate_queue_worker_health!(log:)
  335. return unless Rails.application.config.active_job.queue_adapter.to_s == "sidekiq"
  336. require "sidekiq/api"
  337. process_count = Sidekiq::ProcessSet.new.size
  338. return unless process_count.zero?
  339. metadata = log.metadata.is_a?(Hash) ? log.metadata : {}
  340. log.update!(
  341. metadata: metadata.merge(
  342. "queue_worker_warning" => "No active Sidekiq worker process detected when job was enqueued.",
  343. "queue_worker_warning_at" => Time.current.utc.iso8601(3)
  344. )
  345. )
  346. Ops::StructuredLogger.warn(
  347. event: "jobs.enqueued_without_workers",
  348. payload: {
  349. action_log_id: log.id,
  350. action: log.action,
  351. active_job_id: log.active_job_id,
  352. queue_name: log.queue_name
  353. }
  354. )
  355. rescue StandardError
  356. nil
  357. end
  358. def cleanup_profile_debug_files(username)
  359. debug_dirs = [
  360. Rails.root.join("tmp", "story_debug_snapshots"),
  361. Rails.root.join("tmp", "story_reel_debug")
  362. ]
  363. debug_dirs.each do |dir|
  364. next unless Dir.exist?(dir)
  365. # Remove files matching the username pattern
  366. Dir.glob(File.join(dir, "#{username}_*")).each do |file|
  367. File.delete(file) if File.exist?(file)
  368. end
  369. end
  370. end
  371. end

app/controllers/instagram_profile_messages_controller.rb

0.0% lines covered

100.0% branches covered

35 relevant lines. 0 lines covered and 35 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramProfileMessagesController < ApplicationController
  2. before_action :require_current_account!
  3. def create
  4. profile = current_account.instagram_profiles.find(params[:instagram_profile_id])
  5. body = params.dig(:instagram_message, :body).to_s.strip
  6. raise "Message cannot be blank" if body.blank?
  7. message = current_account.instagram_messages.create!(
  8. instagram_profile: profile,
  9. direction: "outgoing",
  10. body: body,
  11. status: "queued"
  12. )
  13. SendInstagramMessageJob.perform_later(instagram_account_id: current_account.id, instagram_message_id: message.id)
  14. respond_to do |format|
  15. format.html { redirect_to instagram_profile_path(profile), notice: "Message queued for delivery." }
  16. format.turbo_stream do
  17. render turbo_stream: [
  18. turbo_stream.prepend("messages", partial: "instagram_messages/row", locals: { message: message }),
  19. turbo_stream.replace("message_form", partial: "instagram_messages/form", locals: { profile: profile, message: profile.instagram_messages.new })
  20. ]
  21. end
  22. end
  23. rescue StandardError => e
  24. respond_to do |format|
  25. format.html { redirect_to instagram_profile_path(params[:instagram_profile_id]), alert: "Send failed: #{e.message}" }
  26. format.turbo_stream do
  27. render turbo_stream: turbo_stream.append(
  28. "notifications",
  29. partial: "shared/notification",
  30. locals: { kind: "alert", message: "Send failed: #{e.message}" }
  31. )
  32. end
  33. end
  34. end
  35. end

app/controllers/instagram_profile_posts_controller.rb

0.0% lines covered

100.0% branches covered

249 relevant lines. 0 lines covered and 249 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramProfilePostsController < ApplicationController
  2. include ProfilePostPreviewSupport
  3. before_action :require_current_account!
  4. def analyze
  5. profile = current_account.instagram_profiles.find(params[:instagram_profile_id])
  6. post = profile.instagram_profile_posts.find(params[:id])
  7. if analysis_in_progress?(post)
  8. message = "Post analysis already running for #{post.shortcode}."
  9. respond_to do |format|
  10. format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: message }
  11. format.turbo_stream do
  12. profile_posts = profile.instagram_profile_posts.includes(:instagram_profile_post_comments, :ai_analyses, { instagram_post_faces: :instagram_story_person }, media_attachment: :blob, preview_image_attachment: :blob).recent_first.limit(100)
  13. render turbo_stream: [
  14. turbo_stream.append(
  15. "notifications",
  16. partial: "shared/notification",
  17. locals: { kind: "notice", message: message }
  18. ),
  19. turbo_stream.replace(
  20. "captured_profile_posts_section",
  21. partial: "instagram_profiles/captured_posts_section",
  22. locals: {
  23. profile: profile,
  24. profile_posts: profile_posts
  25. }
  26. )
  27. ]
  28. end
  29. format.json { render json: { message: message }, status: :accepted }
  30. end
  31. return
  32. end
  33. post.update!(ai_status: "pending", analyzed_at: nil)
  34. task_flags = {
  35. analyze_visual: boolean_param(params[:analyze_visual], default: true),
  36. analyze_faces: boolean_param(params[:analyze_faces], default: true),
  37. run_ocr: boolean_param(params[:run_ocr], default: true),
  38. run_video: boolean_param(params[:run_video], default: true),
  39. run_metadata: boolean_param(params[:run_metadata], default: true),
  40. generate_comments: boolean_param(params[:generate_comments], default: true),
  41. enforce_comment_evidence_policy: boolean_param(params[:enforce_comment_evidence_policy], default: false),
  42. retry_on_incomplete_profile: boolean_param(params[:retry_on_incomplete_profile], default: false)
  43. }
  44. AnalyzeInstagramProfilePostJob.perform_later(
  45. instagram_account_id: current_account.id,
  46. instagram_profile_id: profile.id,
  47. instagram_profile_post_id: post.id,
  48. task_flags: task_flags
  49. )
  50. respond_to do |format|
  51. format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "Post analysis queued for #{post.shortcode}." }
  52. format.turbo_stream do
  53. profile_posts = profile.instagram_profile_posts.includes(:instagram_profile_post_comments, :ai_analyses, { instagram_post_faces: :instagram_story_person }, media_attachment: :blob, preview_image_attachment: :blob).recent_first.limit(100)
  54. render turbo_stream: [
  55. turbo_stream.append(
  56. "notifications",
  57. partial: "shared/notification",
  58. locals: { kind: "notice", message: "Post analysis queued for #{post.shortcode}." }
  59. ),
  60. turbo_stream.replace(
  61. "captured_profile_posts_section",
  62. partial: "instagram_profiles/captured_posts_section",
  63. locals: {
  64. profile: profile,
  65. profile_posts: profile_posts
  66. }
  67. )
  68. ]
  69. end
  70. format.json { head :accepted }
  71. end
  72. rescue StandardError => e
  73. respond_to do |format|
  74. format.html { redirect_back fallback_location: instagram_profile_path(params[:instagram_profile_id]), alert: "Unable to queue post analysis: #{e.message}" }
  75. format.turbo_stream do
  76. render turbo_stream: turbo_stream.append(
  77. "notifications",
  78. partial: "shared/notification",
  79. locals: { kind: "alert", message: "Unable to queue post analysis: #{e.message}" }
  80. )
  81. end
  82. format.json { render json: { error: e.message }, status: :unprocessable_entity }
  83. end
  84. end
  85. def analyze_next_batch
  86. profile = current_account.instagram_profiles.find(params[:instagram_profile_id])
  87. offset = params[:offset].to_i || 50
  88. batch_size = 10
  89. # Find unanalyzed posts starting from the offset
  90. unanalyzed_posts = profile.instagram_profile_posts
  91. .where.not(ai_status: "analyzed")
  92. .or(profile.instagram_profile_posts.where(ai_status: nil))
  93. .order(:taken_at)
  94. .offset(offset)
  95. .limit(batch_size)
  96. if unanalyzed_posts.empty?
  97. message = "No more posts to analyze."
  98. respond_to do |format|
  99. format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: message }
  100. format.turbo_stream do
  101. render turbo_stream: turbo_stream.append(
  102. "notifications",
  103. partial: "shared/notification",
  104. locals: { kind: "notice", message: message }
  105. )
  106. end
  107. format.json { render json: { message: message }, status: :ok }
  108. end
  109. return
  110. end
  111. # Create action log for this batch
  112. action_log = profile.instagram_profile_action_logs.create!(
  113. instagram_account: current_account,
  114. action: "analyze_profile_posts_batch",
  115. status: "queued",
  116. trigger_source: "ui",
  117. occurred_at: Time.current,
  118. metadata: {
  119. requested_by: "InstagramProfilePostsController",
  120. offset: offset,
  121. batch_size: batch_size,
  122. post_ids: unanalyzed_posts.pluck(:id),
  123. analysis_batch: "next_#{batch_size}_from_#{offset}"
  124. }
  125. )
  126. # Queue analysis job
  127. job = AnalyzeCapturedInstagramProfilePostsJob.perform_later(
  128. instagram_account_id: current_account.id,
  129. instagram_profile_id: profile.id,
  130. profile_action_log_id: action_log.id,
  131. post_ids: unanalyzed_posts.pluck(:id),
  132. refresh_profile_insights: false
  133. )
  134. action_log.update!(active_job_id: job.job_id, queue_name: job.queue_name)
  135. message = "Queued analysis for next #{unanalyzed_posts.length} posts (posts #{offset + 1}-#{offset + unanalyzed_posts.length})."
  136. respond_to do |format|
  137. format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: message }
  138. format.turbo_stream do
  139. profile_posts = profile.instagram_profile_posts.includes(:instagram_profile_post_comments, :ai_analyses, { instagram_post_faces: :instagram_story_person }, media_attachment: :blob, preview_image_attachment: :blob).recent_first.limit(100)
  140. render turbo_stream: [
  141. turbo_stream.append(
  142. "notifications",
  143. partial: "shared/notification",
  144. locals: { kind: "notice", message: message }
  145. ),
  146. turbo_stream.replace(
  147. "captured_profile_posts_section",
  148. partial: "instagram_profiles/captured_posts_section",
  149. locals: {
  150. profile: profile,
  151. profile_posts: profile_posts
  152. }
  153. )
  154. ]
  155. end
  156. format.json { render json: { message: message, job_id: job.job_id }, status: :accepted }
  157. end
  158. rescue StandardError => e
  159. respond_to do |format|
  160. format.html { redirect_back fallback_location: instagram_profile_path(params[:instagram_profile_id]), alert: "Unable to queue batch analysis: #{e.message}" }
  161. format.turbo_stream do
  162. render turbo_stream: turbo_stream.append(
  163. "notifications",
  164. partial: "shared/notification",
  165. locals: { kind: "alert", message: "Unable to queue batch analysis: #{e.message}" }
  166. )
  167. end
  168. format.json { render json: { error: e.message }, status: :unprocessable_entity }
  169. end
  170. end
  171. def forward_comment
  172. profile = current_account.instagram_profiles.find(params[:instagram_profile_id])
  173. post = profile.instagram_profile_posts.find(params[:id])
  174. comment_text = params[:comment].to_s.strip
  175. raise "Comment cannot be blank" if comment_text.blank?
  176. media_id = post.metadata.is_a?(Hash) ? post.metadata["media_id"].to_s.strip : ""
  177. raise "Media id missing for this post. Re-run profile analysis to refresh post metadata." if media_id.blank?
  178. action_log = profile.instagram_profile_action_logs.create!(
  179. instagram_account: current_account,
  180. action: "post_comment",
  181. status: "queued",
  182. trigger_source: "ui",
  183. occurred_at: Time.current,
  184. metadata: {
  185. requested_by: "InstagramProfilePostsController",
  186. post_shortcode: post.shortcode,
  187. media_id: media_id,
  188. comment_text: comment_text
  189. }
  190. )
  191. job = PostInstagramProfileCommentJob.perform_later(
  192. instagram_account_id: current_account.id,
  193. instagram_profile_id: profile.id,
  194. instagram_profile_post_id: post.id,
  195. comment_text: comment_text,
  196. media_id: media_id,
  197. profile_action_log_id: action_log.id
  198. )
  199. action_log.update!(active_job_id: job.job_id, queue_name: job.queue_name)
  200. respond_to do |format|
  201. format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "Comment queued for #{post.shortcode}." }
  202. format.turbo_stream do
  203. action_logs = profile.instagram_profile_action_logs.recent_first.limit(100)
  204. render turbo_stream: [
  205. turbo_stream.append(
  206. "notifications",
  207. partial: "shared/notification",
  208. locals: { kind: "notice", message: "Comment queued for #{post.shortcode}." }
  209. ),
  210. turbo_stream.replace(
  211. "action_history_section",
  212. partial: "instagram_profiles/action_history_section",
  213. locals: { action_logs: action_logs }
  214. )
  215. ]
  216. end
  217. format.json { head :accepted }
  218. end
  219. rescue StandardError => e
  220. respond_to do |format|
  221. format.html { redirect_back fallback_location: instagram_profile_path(params[:instagram_profile_id]), alert: "Unable to queue comment: #{e.message}" }
  222. format.turbo_stream do
  223. render turbo_stream: turbo_stream.append(
  224. "notifications",
  225. partial: "shared/notification",
  226. locals: { kind: "alert", message: "Unable to queue comment: #{e.message}" }
  227. )
  228. end
  229. format.json { render json: { error: e.message }, status: :unprocessable_entity }
  230. end
  231. end
  232. private
  233. def boolean_param(value, default:)
  234. return default if value.nil?
  235. ActiveModel::Type::Boolean.new.cast(value)
  236. end
  237. def analysis_in_progress?(post)
  238. metadata = post.metadata
  239. return false unless metadata.is_a?(Hash)
  240. pipeline = metadata["ai_pipeline"]
  241. return false unless pipeline.is_a?(Hash)
  242. return false unless pipeline["status"].to_s == "running"
  243. required_steps = Array(pipeline["required_steps"]).map(&:to_s)
  244. return false if required_steps.empty?
  245. terminal_statuses = Ai::PostAnalysisPipelineState::TERMINAL_STATUSES
  246. required_steps.any? do |step|
  247. !terminal_statuses.include?(pipeline.dig("steps", step, "status").to_s)
  248. end
  249. rescue StandardError
  250. false
  251. end
  252. end

app/controllers/instagram_profiles_controller.rb

0.0% lines covered

100.0% branches covered

171 relevant lines. 0 lines covered and 171 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramProfilesController < ApplicationController
  2. include ProfilePostPreviewSupport
  3. before_action :require_current_account!
  4. before_action :set_account_and_profile!, only: %i[
  5. show
  6. events
  7. tags
  8. captured_posts_section
  9. downloaded_stories_section
  10. messages_section
  11. action_history_section
  12. events_table_section
  13. ]
  14. def index
  15. @account = current_account
  16. query_result = InstagramProfiles::ProfilesIndexQuery.new(account: @account, params: params).call
  17. @q = query_result.q
  18. @filter = query_result.filter
  19. @page = query_result.page
  20. @per_page = query_result.per_page
  21. @total = query_result.total
  22. @pages = query_result.pages
  23. @profiles = query_result.profiles
  24. @latest_sync_run = @account.sync_runs.order(created_at: :desc).first
  25. @counts = {
  26. total: @account.instagram_profiles.count,
  27. mutuals: @account.instagram_profiles.where(following: true, follows_you: true).count,
  28. following: @account.instagram_profiles.where(following: true).count,
  29. followers: @account.instagram_profiles.where(follows_you: true).count
  30. }
  31. respond_to do |format|
  32. format.html
  33. format.json do
  34. render json: InstagramProfiles::TabulatorProfilesPayloadBuilder.new(
  35. profiles: @profiles,
  36. total: @total,
  37. pages: @pages,
  38. view_context: view_context
  39. ).call
  40. end
  41. end
  42. end
  43. def show
  44. snapshot = InstagramProfiles::ShowSnapshotService.new(account: @account, profile: @profile).call
  45. @profile_posts_total_count = snapshot[:profile_posts_total_count]
  46. @deleted_posts_count = snapshot[:deleted_posts_count]
  47. @active_posts_count = snapshot[:active_posts_count]
  48. @analyzed_posts_count = snapshot[:analyzed_posts_count]
  49. @pending_posts_count = snapshot[:pending_posts_count]
  50. @messages_count = snapshot[:messages_count]
  51. @action_logs_count = snapshot[:action_logs_count]
  52. @new_message = @profile.instagram_messages.new
  53. @latest_analysis = snapshot[:latest_analysis]
  54. @latest_story_intelligence_event = snapshot[:latest_story_intelligence_event]
  55. @available_tags = snapshot[:available_tags]
  56. @history_build_state = snapshot[:history_build_state]
  57. @history_ready = snapshot[:history_ready]
  58. @mutual_profiles = snapshot[:mutual_profiles]
  59. end
  60. def captured_posts_section
  61. profile_posts =
  62. @profile.instagram_profile_posts
  63. .includes(:instagram_profile_post_comments, :ai_analyses, { instagram_post_faces: :instagram_story_person }, media_attachment: :blob, preview_image_attachment: :blob)
  64. .recent_first
  65. .limit(40)
  66. render_profile_frame(
  67. frame_id: "profile_captured_posts_#{@profile.id}",
  68. partial: "instagram_profiles/captured_posts_section",
  69. locals: { profile: @profile, profile_posts: profile_posts }
  70. )
  71. end
  72. def downloaded_stories_section
  73. downloaded_story_events =
  74. @profile.instagram_profile_events
  75. .joins(:media_attachment)
  76. .with_attached_media
  77. .with_attached_preview_image
  78. .where(kind: InstagramProfileEvent::STORY_ARCHIVE_EVENT_KINDS)
  79. .order(detected_at: :desc, id: :desc)
  80. .limit(18)
  81. render_profile_frame(
  82. frame_id: "profile_downloaded_stories_#{@profile.id}",
  83. partial: "instagram_profiles/downloaded_stories_section",
  84. locals: { profile: @profile, downloaded_story_events: downloaded_story_events }
  85. )
  86. end
  87. def messages_section
  88. messages = @profile.instagram_messages.recent_first.limit(120)
  89. render_profile_frame(
  90. frame_id: "profile_messages_#{@profile.id}",
  91. partial: "instagram_profiles/messages_section",
  92. locals: { messages: messages }
  93. )
  94. end
  95. def action_history_section
  96. action_logs = @profile.instagram_profile_action_logs.recent_first.limit(100)
  97. render_profile_frame(
  98. frame_id: "profile_actions_#{@profile.id}",
  99. partial: "instagram_profiles/action_history_section",
  100. locals: { action_logs: action_logs }
  101. )
  102. end
  103. def events_table_section
  104. render_profile_frame(
  105. frame_id: "profile_events_table_#{@profile.id}",
  106. partial: "instagram_profiles/events_table_section",
  107. locals: { profile: @profile }
  108. )
  109. end
  110. def events
  111. query_result = InstagramProfiles::EventsQuery.new(profile: @profile, params: params).call
  112. render json: InstagramProfiles::TabulatorEventsPayloadBuilder.new(
  113. events: query_result.events,
  114. total: query_result.total,
  115. pages: query_result.pages,
  116. view_context: view_context
  117. ).call
  118. end
  119. def tags
  120. names = Array(params[:tag_names]).map { |tag| tag.to_s.strip.downcase }.reject(&:blank?)
  121. custom = params[:custom_tags].to_s.split(/[,\n]/).map { |tag| tag.to_s.strip.downcase }.reject(&:blank?)
  122. desired = (names + custom).uniq
  123. tags = desired.map { |name| ProfileTag.find_or_create_by!(name: name) }
  124. @profile.profile_tags = tags
  125. @profile.save!
  126. respond_to do |format|
  127. format.html { redirect_to instagram_profile_path(@profile), notice: "Tags updated." }
  128. format.turbo_stream do
  129. render turbo_stream: [
  130. turbo_stream.append(
  131. "notifications",
  132. partial: "shared/notification",
  133. locals: { kind: "notice", message: "Tags updated." }
  134. ),
  135. turbo_stream.replace(
  136. "profile_tags_section",
  137. partial: "instagram_profiles/profile_tags_section",
  138. locals: {
  139. profile: @profile,
  140. available_tags: InstagramProfiles::ShowSnapshotService::AVAILABLE_TAGS
  141. }
  142. )
  143. ]
  144. end
  145. end
  146. rescue StandardError => e
  147. respond_to do |format|
  148. format.html { redirect_to instagram_profile_path(params[:id]), alert: "Unable to update tags: #{e.message}" }
  149. format.turbo_stream do
  150. render turbo_stream: turbo_stream.append(
  151. "notifications",
  152. partial: "shared/notification",
  153. locals: { kind: "alert", message: "Unable to update tags: #{e.message}" }
  154. )
  155. end
  156. end
  157. end
  158. private
  159. def set_account_and_profile!
  160. @account = current_account
  161. @profile = @account.instagram_profiles.find(params[:id])
  162. end
  163. def render_profile_frame(frame_id:, partial:, locals:)
  164. body = render_to_string(partial: partial, locals: locals)
  165. if turbo_frame_request?
  166. render html: view_context.turbo_frame_tag(frame_id) { body.html_safe }
  167. else
  168. render html: body.html_safe
  169. end
  170. end
  171. end

app/controllers/instagram_story_people_controller.rb

0.0% lines covered

100.0% branches covered

149 relevant lines. 0 lines covered and 149 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramStoryPeopleController < ApplicationController
  2. before_action :require_current_account!
  3. before_action :set_profile
  4. before_action :set_person, only: %i[
  5. show
  6. confirm
  7. mark_incorrect
  8. link_profile_owner
  9. merge
  10. separate_face
  11. ]
  12. before_action :set_feedback_service, only: %i[
  13. confirm
  14. mark_incorrect
  15. link_profile_owner
  16. merge
  17. separate_face
  18. ]
  19. def show
  20. @post_groups = grouped_post_faces(@person)
  21. @story_groups = grouped_story_faces(@person)
  22. @post_face_count = @person.instagram_post_faces.count
  23. @story_face_count = @person.instagram_story_faces.count
  24. @total_appearances = @post_face_count + @story_face_count
  25. @merge_candidates = @profile.instagram_story_people.recently_seen.where.not(id: @person.id).limit(80)
  26. end
  27. def confirm
  28. @feedback_service.confirm_person!(
  29. person: @person,
  30. label: params[:label],
  31. real_person_status: params[:real_person_status]
  32. )
  33. redirect_to person_path(@person), notice: "Identity confirmed for #{@person.display_label}."
  34. rescue StandardError => e
  35. redirect_to person_path(@person), alert: "Unable to confirm identity: #{e.message}"
  36. end
  37. def mark_incorrect
  38. @feedback_service.mark_incorrect!(
  39. person: @person,
  40. reason: params[:reason]
  41. )
  42. redirect_to person_path(@person), notice: "#{@person.display_label} was marked as incorrect."
  43. rescue StandardError => e
  44. redirect_to person_path(@person), alert: "Unable to mark person as incorrect: #{e.message}"
  45. end
  46. def link_profile_owner
  47. @feedback_service.link_profile_owner!(person: @person)
  48. redirect_to person_path(@person), notice: "#{@person.display_label} is now linked as the profile owner."
  49. rescue StandardError => e
  50. redirect_to person_path(@person), alert: "Unable to set profile owner link: #{e.message}"
  51. end
  52. def merge
  53. target_person = @profile.instagram_story_people.find(params[:target_person_id])
  54. @feedback_service.merge_people!(source_person: @person, target_person: target_person)
  55. redirect_to person_path(target_person), notice: "Merged #{@person.display_label} into #{target_person.display_label}."
  56. rescue StandardError => e
  57. redirect_to person_path(@person), alert: "Unable to merge identities: #{e.message}"
  58. end
  59. def separate_face
  60. face = find_face!(params[:face_type], params[:face_id])
  61. new_person = @feedback_service.separate_face!(person: @person, face: face)
  62. redirect_to person_path(new_person), notice: "Created #{new_person.display_label} from a separated detection."
  63. rescue StandardError => e
  64. redirect_to person_path(@person), alert: "Unable to separate detection: #{e.message}"
  65. end
  66. private
  67. def set_profile
  68. @profile = current_account.instagram_profiles.find(params[:instagram_profile_id])
  69. end
  70. def set_person
  71. @person = @profile.instagram_story_people.find(params[:id])
  72. end
  73. def set_feedback_service
  74. @feedback_service = PersonIdentityFeedbackService.new
  75. end
  76. def person_path(person)
  77. instagram_profile_instagram_story_person_path(@profile, person)
  78. end
  79. def find_face!(face_type, face_id)
  80. token = face_type.to_s.strip
  81. id = face_id.to_i
  82. raise ActiveRecord::RecordNotFound, "Face id missing" unless id.positive?
  83. if token == "story"
  84. InstagramStoryFace
  85. .joins(:instagram_story)
  86. .where(instagram_stories: { instagram_profile_id: @profile.id })
  87. .find(id)
  88. else
  89. InstagramPostFace
  90. .joins(:instagram_profile_post)
  91. .where(instagram_profile_posts: { instagram_profile_id: @profile.id })
  92. .find(id)
  93. end
  94. end
  95. def grouped_post_faces(person)
  96. faces = person.instagram_post_faces
  97. .includes(instagram_profile_post: [ media_attachment: :blob, preview_image_attachment: :blob ])
  98. .order(created_at: :desc)
  99. .limit(240)
  100. .to_a
  101. grouped_faces(
  102. faces: faces,
  103. owner_key: :instagram_profile_post_id,
  104. count_rows: InstagramPostFace
  105. .where(instagram_profile_post_id: faces.map(&:instagram_profile_post_id).uniq)
  106. .where.not(instagram_story_person_id: nil)
  107. .pluck(:instagram_profile_post_id, :instagram_story_person_id)
  108. )
  109. end
  110. def grouped_story_faces(person)
  111. faces = person.instagram_story_faces
  112. .includes(instagram_story: [ media_attachment: :blob ])
  113. .order(created_at: :desc)
  114. .limit(240)
  115. .to_a
  116. grouped_faces(
  117. faces: faces,
  118. owner_key: :instagram_story_id,
  119. count_rows: InstagramStoryFace
  120. .where(instagram_story_id: faces.map(&:instagram_story_id).uniq)
  121. .where.not(instagram_story_person_id: nil)
  122. .pluck(:instagram_story_id, :instagram_story_person_id)
  123. )
  124. end
  125. def grouped_faces(faces:, owner_key:, count_rows:)
  126. return [] if faces.empty?
  127. people_count_by_owner = count_rows
  128. .group_by(&:first)
  129. .transform_values { |rows| rows.map(&:last).uniq.size }
  130. faces
  131. .group_by(&owner_key)
  132. .map do |owner_id, row_faces|
  133. owner = row_faces.first.public_send(owner_key.to_s.sub(/_id\z/, ""))
  134. next unless owner
  135. total_people = people_count_by_owner[owner_id].to_i
  136. {
  137. owner: owner,
  138. faces: row_faces.first(8),
  139. face_count_for_person: row_faces.length,
  140. total_people: total_people,
  141. scope: total_people > 1 ? "multiple_people" : "single_person",
  142. occurred_at: owner.respond_to?(:taken_at) ? owner.taken_at : nil
  143. }
  144. end
  145. .compact
  146. .sort_by { |row| [ row[:occurred_at] || Time.at(0), row[:owner].id ] }
  147. .reverse
  148. end
  149. end

app/controllers/workspaces_controller.rb

0.0% lines covered

100.0% branches covered

31 relevant lines. 0 lines covered and 31 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class WorkspacesController < ApplicationController
  2. include ProfilePostPreviewSupport
  3. before_action :require_current_account!
  4. DEFAULT_QUEUE_LIMIT = 40
  5. def actions
  6. @account = resolved_account
  7. @queue_result = load_queue_result(account: @account)
  8. end
  9. def actions_feed
  10. account = resolved_account
  11. queue_result = load_queue_result(account: account)
  12. render partial: "workspaces/actions_queue_section", locals: { account: account, queue_result: queue_result }
  13. rescue StandardError => e
  14. render html: view_context.content_tag(:p, "Unable to refresh workspace queue: #{e.message}", class: "meta"), status: :unprocessable_entity
  15. end
  16. private
  17. def resolved_account
  18. requested_id = params[:instagram_account_id].to_i
  19. return current_account if requested_id <= 0
  20. current_account.id == requested_id ? current_account : current_account.class.find(requested_id)
  21. rescue StandardError
  22. current_account
  23. end
  24. def load_queue_result(account:)
  25. Workspace::ActionsTodoQueueService.new(
  26. account: account,
  27. limit: params.fetch(:limit, DEFAULT_QUEUE_LIMIT),
  28. enqueue_processing: true
  29. ).fetch!
  30. end
  31. end

app/helpers/ai_dashboard_helper.rb

22.22% lines covered

0.0% branches covered

9 relevant lines. 2 lines covered and 7 lines missed.
6 total branches, 0 branches covered and 6 branches missed.
    
  1. 1 module AiDashboardHelper
  2. 1 def get_default_test_for_service(service)
  3. case service.to_s
  4. when: 0 when 'vision'
  5. 'labels'
  6. when: 0 when 'face'
  7. 'detection'
  8. when: 0 when 'ocr'
  9. 'text_extraction'
  10. when: 0 when 'whisper'
  11. 'transcription'
  12. when: 0 when 'video'
  13. 'analysis'
  14. else: 0 else
  15. 'basic'
  16. end
  17. end
  18. end

app/helpers/application_helper.rb

14.55% lines covered

0.0% branches covered

55 relevant lines. 8 lines covered and 47 lines missed.
39 total branches, 0 branches covered and 39 branches missed.
    
  1. 1 module ApplicationHelper
  2. 1 def relative_time_with_tooltip(value, blank: "-")
  3. then: 0 else: 0 return blank if value.blank?
  4. time = value.in_time_zone
  5. relative =
  6. then: 0 if time <= Time.current
  7. "#{time_ago_in_words(time)} ago"
  8. else: 0 else
  9. "in #{time_ago_in_words(time)}"
  10. end
  11. content_tag(
  12. :time,
  13. relative,
  14. datetime: time.iso8601,
  15. title: time.strftime("%Y-%m-%d %H:%M:%S %Z")
  16. )
  17. end
  18. 1 def top_nav_link_to(name = nil, path = nil, section:, **options, &block)
  19. then: 0 else: 0 if block_given?
  20. path = name
  21. name = capture(&block)
  22. end
  23. active = top_nav_active?(section)
  24. classes = [ "nav-link", options.delete(:class) ]
  25. then: 0 else: 0 classes << "active" if active
  26. aria_options = (options.delete(:aria) || {}).dup
  27. then: 0 else: 0 aria_options[:current] = "page" if active
  28. link_to name, path, **options.merge(class: classes.compact.join(" "), aria: aria_options)
  29. end
  30. 1 def get_default_test_for_service(service)
  31. case service.to_s
  32. when: 0 when 'vision'
  33. 'labels'
  34. when: 0 when 'face'
  35. 'detection'
  36. when: 0 when 'ocr'
  37. 'text_extraction'
  38. when: 0 when 'whisper'
  39. 'transcription'
  40. when: 0 when 'video'
  41. 'analysis'
  42. else: 0 else
  43. 'basic'
  44. end
  45. end
  46. 1 def ai_dashboard_path
  47. ai_dashboard_index_path
  48. end
  49. 1 def current_section
  50. case controller_path
  51. when: 0 when "instagram_accounts"
  52. :accounts
  53. when: 0 when "instagram_profiles", "instagram_profile_actions", "instagram_profile_posts", "instagram_profile_messages", "instagram_story_people"
  54. :profiles
  55. when: 0 when "instagram_posts"
  56. :posts
  57. when: 0 when "workspaces"
  58. :workspace_actions
  59. when: 0 when "ai_dashboard"
  60. :ai_dashboard
  61. when: 0 when "admin/background_jobs"
  62. then: 0 if action_name == "dashboard" || request.path.start_with?("/admin/jobs")
  63. else: 0 :jobs
  64. then: 0 else: 0 elsif %w[failures failure].include?(action_name)
  65. :failures
  66. end
  67. when: 0 when "admin/issues"
  68. :issues
  69. when: 0 when "admin/storage_ingestions"
  70. :storage
  71. else: 0 else
  72. nil
  73. end
  74. end
  75. 1 private
  76. 1 def top_nav_active?(section)
  77. case section
  78. when: 0 when :accounts
  79. controller_path == "instagram_accounts"
  80. when: 0 when :profiles
  81. %w[
  82. instagram_profiles
  83. instagram_profile_actions
  84. instagram_profile_posts
  85. instagram_profile_messages
  86. instagram_story_people
  87. ].include?(controller_path)
  88. when: 0 when :posts
  89. controller_path == "instagram_posts"
  90. when: 0 when :workspace_actions
  91. controller_path == "workspaces"
  92. when: 0 when :ai_dashboard
  93. controller_path == "ai_dashboard"
  94. when: 0 when :jobs
  95. request.path.start_with?("/admin/jobs") || (controller_path == "admin/background_jobs" && action_name == "dashboard")
  96. when: 0 when :failures
  97. controller_path == "admin/background_jobs" && %w[failures failure].include?(action_name)
  98. when: 0 when :issues
  99. controller_path == "admin/issues"
  100. when: 0 when :storage
  101. controller_path == "admin/storage_ingestions"
  102. else: 0 else
  103. false
  104. end
  105. end
  106. end

app/helpers/dashboard_helper.rb

100.0% lines covered

100.0% branches covered

1 relevant lines. 1 lines covered and 0 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. 1 module DashboardHelper
  2. end

app/helpers/instagram_accounts_helper.rb

100.0% lines covered

100.0% branches covered

1 relevant lines. 1 lines covered and 0 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. 1 module InstagramAccountsHelper
  2. end

app/helpers/messages_helper.rb

100.0% lines covered

100.0% branches covered

1 relevant lines. 1 lines covered and 0 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. 1 module MessagesHelper
  2. end

app/helpers/syncs_helper.rb

100.0% lines covered

100.0% branches covered

1 relevant lines. 1 lines covered and 0 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. 1 module SyncsHelper
  2. end

app/jobs/analyze_captured_instagram_profile_posts_job.rb

0.0% lines covered

100.0% branches covered

174 relevant lines. 0 lines covered and 174 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class AnalyzeCapturedInstagramProfilePostsJob < ApplicationJob
  2. queue_as :ai
  3. DEFAULT_BATCH_SIZE = 6
  4. MAX_BATCH_SIZE = 20
  5. def perform(
  6. instagram_account_id:,
  7. instagram_profile_id:,
  8. profile_action_log_id: nil,
  9. post_ids: nil,
  10. batch_size: DEFAULT_BATCH_SIZE,
  11. refresh_profile_insights: true,
  12. total_candidates: nil
  13. )
  14. account = InstagramAccount.find(instagram_account_id)
  15. profile = account.instagram_profiles.find(instagram_profile_id)
  16. action_log = find_or_create_action_log(
  17. account: account,
  18. profile: profile,
  19. profile_action_log_id: profile_action_log_id
  20. )
  21. ids = normalize_post_ids(profile: profile, post_ids: post_ids)
  22. if ids.empty?
  23. action_log.mark_succeeded!(
  24. extra_metadata: { skipped: true, reason: "no_candidate_posts", queue_name: queue_name, active_job_id: job_id },
  25. log_text: "No candidate posts required analysis."
  26. )
  27. return
  28. end
  29. policy_decision = Instagram::ProfileScanPolicy.new(profile: profile).decision
  30. if policy_decision[:skip_post_analysis]
  31. mark_posts_as_policy_skipped!(profile: profile, ids: ids, decision: policy_decision)
  32. action_log.mark_succeeded!(
  33. extra_metadata: {
  34. skipped: true,
  35. reason: "profile_scan_policy_blocked",
  36. skip_reason_code: policy_decision[:reason_code],
  37. skip_reason: policy_decision[:reason],
  38. followers_count: policy_decision[:followers_count],
  39. max_followers: policy_decision[:max_followers],
  40. skipped_posts_count: ids.length
  41. },
  42. log_text: "Skipped post analysis: #{policy_decision[:reason]}"
  43. )
  44. return
  45. end
  46. batch_size_i = batch_size.to_i.clamp(1, MAX_BATCH_SIZE)
  47. total_candidates_i = total_candidates.to_i.positive? ? total_candidates.to_i : ids.length
  48. current_batch_ids = ids.first(batch_size_i)
  49. remaining_ids = ids.drop(batch_size_i)
  50. action_log.mark_running!(
  51. extra_metadata: {
  52. queue_name: queue_name,
  53. active_job_id: job_id,
  54. batch_size: batch_size_i,
  55. current_batch_count: current_batch_ids.length,
  56. remaining_count: remaining_ids.length
  57. }
  58. )
  59. analyzed_now = 0
  60. skipped_now = 0
  61. failed_now = []
  62. current_batch_ids.each do |post_id|
  63. post = profile.instagram_profile_posts.find_by(id: post_id)
  64. next unless post
  65. if post.ai_status.to_s == "analyzed" && post.analyzed_at.present?
  66. skipped_now += 1
  67. next
  68. end
  69. AnalyzeInstagramProfilePostJob.perform_now(
  70. instagram_account_id: account.id,
  71. instagram_profile_id: profile.id,
  72. instagram_profile_post_id: post.id,
  73. pipeline_mode: "inline",
  74. task_flags: {
  75. generate_comments: false,
  76. enforce_comment_evidence_policy: false,
  77. retry_on_incomplete_profile: false
  78. }
  79. )
  80. analyzed_now += 1
  81. rescue StandardError => e
  82. failed_now << {
  83. post_id: post_id,
  84. shortcode: post&.shortcode.to_s.presence,
  85. error_class: e.class.name,
  86. error_message: e.message.to_s.byteslice(0, 220)
  87. }.compact
  88. next
  89. end
  90. state = merged_queue_state(
  91. action_log: action_log,
  92. total_candidates: total_candidates_i,
  93. processed_increment: current_batch_ids.length,
  94. analyzed_increment: analyzed_now,
  95. skipped_increment: skipped_now,
  96. failed_rows: failed_now,
  97. remaining_count: remaining_ids.length
  98. )
  99. if remaining_ids.any?
  100. next_job = self.class.perform_later(
  101. instagram_account_id: account.id,
  102. instagram_profile_id: profile.id,
  103. profile_action_log_id: action_log.id,
  104. post_ids: remaining_ids,
  105. batch_size: batch_size_i,
  106. refresh_profile_insights: refresh_profile_insights,
  107. total_candidates: total_candidates_i
  108. )
  109. state["next_job_id"] = next_job.job_id
  110. action_log.mark_running!(extra_metadata: { analysis_queue_state: state, active_job_id: next_job.job_id, queue_name: next_job.queue_name })
  111. return
  112. end
  113. refresh_job = nil
  114. if ActiveModel::Type::Boolean.new.cast(refresh_profile_insights) && state["analyzed_count"].to_i.positive?
  115. refresh_job = AnalyzeInstagramProfileJob.perform_later(
  116. instagram_account_id: account.id,
  117. instagram_profile_id: profile.id
  118. )
  119. end
  120. action_log.mark_succeeded!(
  121. extra_metadata: {
  122. analysis_queue_state: state,
  123. refresh_profile_insights: ActiveModel::Type::Boolean.new.cast(refresh_profile_insights),
  124. profile_insights_refresh_job_id: refresh_job&.job_id
  125. },
  126. log_text: "Post analysis completed. analyzed=#{state['analyzed_count']}, skipped=#{state['skipped_count']}, failed=#{state['failed_count']}."
  127. )
  128. rescue StandardError => e
  129. action_log&.mark_failed!(error_message: e.message, extra_metadata: { active_job_id: job_id, queue_name: queue_name })
  130. raise
  131. end
  132. private
  133. def find_or_create_action_log(account:, profile:, profile_action_log_id:)
  134. log = profile.instagram_profile_action_logs.find_by(id: profile_action_log_id) if profile_action_log_id.present?
  135. return log if log
  136. profile.instagram_profile_action_logs.create!(
  137. instagram_account: account,
  138. action: "analyze_profile_posts",
  139. status: "queued",
  140. trigger_source: "job",
  141. occurred_at: Time.current,
  142. active_job_id: job_id,
  143. queue_name: queue_name,
  144. metadata: { created_by: self.class.name }
  145. )
  146. end
  147. def normalize_post_ids(profile:, post_ids:)
  148. ids = Array(post_ids).map(&:to_i).select(&:positive?).uniq
  149. return ids if ids.any?
  150. profile.instagram_profile_posts.pending_ai.recent_first.limit(200).pluck(:id)
  151. end
  152. def merged_queue_state(action_log:, total_candidates:, processed_increment:, analyzed_increment:, skipped_increment:, failed_rows:, remaining_count:)
  153. metadata = action_log.metadata.is_a?(Hash) ? action_log.metadata : {}
  154. raw = metadata["analysis_queue_state"].is_a?(Hash) ? metadata["analysis_queue_state"] : {}
  155. previous_failed_rows = Array(raw["failed_posts"]).select { |row| row.is_a?(Hash) }
  156. {
  157. "total_candidates" => [raw["total_candidates"].to_i, total_candidates.to_i].max,
  158. "processed_count" => raw["processed_count"].to_i + processed_increment.to_i,
  159. "analyzed_count" => raw["analyzed_count"].to_i + analyzed_increment.to_i,
  160. "skipped_count" => raw["skipped_count"].to_i + skipped_increment.to_i,
  161. "failed_count" => raw["failed_count"].to_i + Array(failed_rows).length,
  162. "remaining_count" => remaining_count.to_i,
  163. "failed_posts" => (previous_failed_rows + Array(failed_rows)).first(30),
  164. "updated_at" => Time.current.iso8601
  165. }
  166. end
  167. def mark_posts_as_policy_skipped!(profile:, ids:, decision:)
  168. profile.instagram_profile_posts.where(id: Array(ids).map(&:to_i).select(&:positive?)).find_each do |post|
  169. Instagram::ProfileScanPolicy.mark_post_analysis_skipped!(post: post, decision: decision)
  170. rescue StandardError
  171. next
  172. end
  173. end
  174. end

app/jobs/analyze_instagram_post_job.rb

0.0% lines covered

100.0% branches covered

133 relevant lines. 0 lines covered and 133 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class AnalyzeInstagramPostJob < ApplicationJob
  2. require "base64"
  3. require "digest"
  4. require "uri"
  5. queue_as :ai
  6. MAX_INLINE_IMAGE_BYTES = 2 * 1024 * 1024
  7. MAX_INLINE_VIDEO_BYTES = 10 * 1024 * 1024
  8. def perform(instagram_post_id:)
  9. post = InstagramPost.find(instagram_post_id)
  10. account = post.instagram_account
  11. # Resolve an existing profile record for tag rules, if available.
  12. if post.instagram_profile_id.nil? && post.author_username.to_s.strip.present?
  13. post.instagram_profile = account.instagram_profiles.find_by(username: post.author_username)
  14. post.save! if post.changed?
  15. end
  16. payload = build_payload(post)
  17. media = build_media_payload(post)
  18. run = Ai::Runner.new(account: account).analyze!(
  19. purpose: "post",
  20. analyzable: post,
  21. payload: payload,
  22. media: media,
  23. media_fingerprint: media_fingerprint_for(post: post, media: media)
  24. )
  25. result = run[:result]
  26. post.update!(
  27. status: "analyzed",
  28. analyzed_at: Time.current,
  29. ai_provider: run[:provider].key,
  30. ai_model: result[:model],
  31. analysis: result[:analysis]
  32. )
  33. relevant = ActiveModel::Type::Boolean.new.cast(post.analysis&.dig("relevant"))
  34. unless relevant
  35. post.update!(status: "ignored", purge_at: 24.hours.from_now)
  36. end
  37. Turbo::StreamsChannel.broadcast_append_to(
  38. account,
  39. target: "notifications",
  40. partial: "shared/notification",
  41. locals: { kind: "notice", message: "Post analyzed via #{run[:provider].display_name}: #{post.shortcode} (#{relevant ? 'relevant' : 'ignored'})." }
  42. )
  43. rescue StandardError => e
  44. post ||= InstagramPost.where(id: instagram_post_id).first
  45. account ||= post&.instagram_account
  46. post&.update!(status: "pending") # allow retry
  47. Turbo::StreamsChannel.broadcast_append_to(
  48. account,
  49. target: "notifications",
  50. partial: "shared/notification",
  51. locals: { kind: "alert", message: "Post analysis failed: #{e.message}" }
  52. ) if account
  53. raise
  54. end
  55. private
  56. def build_payload(post)
  57. profile = post.instagram_profile
  58. {
  59. post: {
  60. shortcode: post.shortcode,
  61. kind: post.post_kind,
  62. author_username: post.author_username,
  63. caption: post.caption,
  64. taken_at: post.taken_at&.iso8601,
  65. detected_at: post.detected_at&.iso8601,
  66. permalink: post.permalink
  67. },
  68. author_profile: profile ? {
  69. username: profile.username,
  70. display_name: profile.display_name,
  71. bio: profile.bio,
  72. tags: profile.profile_tags.pluck(:name).sort,
  73. following: profile.following,
  74. follows_you: profile.follows_you,
  75. mutual: profile.mutual?
  76. } : nil,
  77. rules: {
  78. # Basic tag-based gates. The AI should treat these as hard preferences.
  79. ignore_if_tagged: %w[relative page excluded],
  80. prefer_interact_if_tagged: %w[female_friend male_friend friend personal_user],
  81. require_manual_review: true
  82. }
  83. }
  84. end
  85. def build_media_payload(post)
  86. return { type: "none" } unless post.media.attached?
  87. blob = post.media.blob
  88. return { type: "none" } unless blob
  89. content_type = blob.content_type.to_s
  90. return { type: "none", content_type: content_type } if blob.byte_size.to_i <= 0
  91. if content_type.start_with?("image/")
  92. return { type: "image", content_type: content_type } if blob.byte_size.to_i > MAX_INLINE_IMAGE_BYTES
  93. data = blob.download
  94. encoded = Base64.strict_encode64(data)
  95. return {
  96. type: "image",
  97. content_type: content_type,
  98. bytes: data,
  99. image_data_url: "data:#{content_type};base64,#{encoded}"
  100. }
  101. end
  102. if content_type.start_with?("video/")
  103. return { type: "none", content_type: content_type, media_skipped_reason: "video_too_large" } if blob.byte_size.to_i > MAX_INLINE_VIDEO_BYTES
  104. return {
  105. type: "video",
  106. content_type: content_type,
  107. reference_id: "instagram_post_#{post.id}",
  108. bytes: blob.download
  109. }
  110. end
  111. { type: "none", content_type: content_type }
  112. rescue StandardError
  113. { type: "none" }
  114. end
  115. def media_fingerprint_for(post:, media:)
  116. if post.media.attached?
  117. checksum = post.media.blob&.checksum.to_s
  118. return "blob:#{checksum}" if checksum.present?
  119. end
  120. normalized_url = normalize_url(post.media_url)
  121. return Digest::SHA256.hexdigest(normalized_url) if normalized_url.present?
  122. bytes = media[:bytes]
  123. return Digest::SHA256.hexdigest(bytes) if bytes.present?
  124. nil
  125. end
  126. def normalize_url(raw)
  127. value = raw.to_s.strip
  128. return nil if value.blank?
  129. uri = URI.parse(value)
  130. return value unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
  131. "#{uri.scheme}://#{uri.host}#{uri.path}"
  132. rescue StandardError
  133. value
  134. end
  135. end

app/jobs/analyze_instagram_profile_job.rb

0.0% lines covered

100.0% branches covered

777 relevant lines. 0 lines covered and 777 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class AnalyzeInstagramProfileJob < ApplicationJob
  2. require "base64"
  3. require "digest"
  4. queue_as :ai
  5. MAX_AI_IMAGE_COUNT = 5
  6. MAX_PROFILE_IMAGE_DESCRIPTION_COUNT = 5
  7. MAX_INLINE_IMAGE_BYTES = 2 * 1024 * 1024
  8. def perform(instagram_account_id:, instagram_profile_id:, profile_action_log_id: nil)
  9. account = InstagramAccount.find(instagram_account_id)
  10. profile = account.instagram_profiles.find(instagram_profile_id)
  11. action_log = find_or_create_action_log(
  12. account: account,
  13. profile: profile,
  14. action: "analyze_profile",
  15. profile_action_log_id: profile_action_log_id
  16. )
  17. action_log.mark_running!(extra_metadata: { queue_name: queue_name, active_job_id: job_id })
  18. policy_decision = Instagram::ProfileScanPolicy.new(profile: profile).decision
  19. if policy_decision[:skip_post_analysis]
  20. if policy_decision[:reason_code].to_s == "non_personal_profile_page" || policy_decision[:reason_code].to_s == "scan_excluded_tag"
  21. Instagram::ProfileScanPolicy.mark_scan_excluded!(profile: profile)
  22. end
  23. action_log.mark_succeeded!(
  24. extra_metadata: {
  25. skipped: true,
  26. reason: "profile_scan_policy_blocked",
  27. skip_reason_code: policy_decision[:reason_code],
  28. skip_reason: policy_decision[:reason],
  29. followers_count: policy_decision[:followers_count],
  30. max_followers: policy_decision[:max_followers]
  31. },
  32. log_text: "Skipped profile AI analysis: #{policy_decision[:reason]}"
  33. )
  34. return
  35. end
  36. collected = Instagram::ProfileAnalysisCollector.new(account: account, profile: profile).collect_and_persist!(
  37. posts_limit: nil,
  38. comments_limit: 20
  39. )
  40. described_posts = enrich_first_profile_images!(account: account, profile: profile, collected_posts: collected[:posts])
  41. accepted_media_context = build_accepted_media_context(profile: profile)
  42. payload = build_profile_payload(
  43. profile: profile,
  44. collected_posts: collected[:posts],
  45. described_posts: described_posts,
  46. accepted_media_context: accepted_media_context
  47. )
  48. media = build_media_inputs(profile: profile, collected_posts: described_posts)
  49. run = Ai::Runner.new(account: account).analyze!(
  50. purpose: "profile",
  51. analyzable: profile,
  52. payload: payload,
  53. media: media
  54. )
  55. update_profile_demographics_from_analysis!(profile: profile, analysis: run.dig(:result, :analysis))
  56. aggregate_demographics_from_accumulated_json!(
  57. account: account,
  58. profile: profile,
  59. latest_profile_analysis: run.dig(:result, :analysis),
  60. accepted_media_context: accepted_media_context
  61. )
  62. Turbo::StreamsChannel.broadcast_append_to(
  63. account,
  64. target: "notifications",
  65. partial: "shared/notification",
  66. locals: { kind: "notice", message: "AI analysis completed for #{profile.username} via #{run[:provider].display_name}." }
  67. )
  68. action_log.mark_succeeded!(
  69. extra_metadata: { provider: run[:provider].key, provider_name: run[:provider].display_name },
  70. log_text: "AI analysis completed via #{run[:provider].display_name}"
  71. )
  72. rescue StandardError => e
  73. Turbo::StreamsChannel.broadcast_append_to(
  74. account,
  75. target: "notifications",
  76. partial: "shared/notification",
  77. locals: { kind: "alert", message: "AI analysis failed: #{e.message}" }
  78. ) if account
  79. action_log&.mark_failed!(error_message: e.message, extra_metadata: { active_job_id: job_id })
  80. raise
  81. end
  82. private
  83. def find_or_create_action_log(account:, profile:, action:, profile_action_log_id:)
  84. log = profile.instagram_profile_action_logs.find_by(id: profile_action_log_id) if profile_action_log_id.present?
  85. return log if log
  86. profile.instagram_profile_action_logs.create!(
  87. instagram_account: account,
  88. action: action,
  89. status: "queued",
  90. trigger_source: "job",
  91. occurred_at: Time.current,
  92. active_job_id: job_id,
  93. queue_name: queue_name,
  94. metadata: { created_by: self.class.name }
  95. )
  96. end
  97. def build_profile_payload(profile:, collected_posts:, described_posts:, accepted_media_context:)
  98. history_narrative = profile.history_narrative_text(max_chunks: 4)
  99. history_chunks = profile.history_narrative_chunks(max_chunks: 8)
  100. recent_messages =
  101. profile.instagram_messages
  102. .where(direction: "outgoing")
  103. .order(created_at: :desc)
  104. .limit(20)
  105. .pluck(:body, :created_at, :sent_at, :status)
  106. .map do |body, created_at, sent_at, status|
  107. {
  108. body: body,
  109. created_at: created_at&.iso8601,
  110. sent_at: sent_at&.iso8601,
  111. status: status
  112. }
  113. end
  114. recent_events =
  115. profile.instagram_profile_events
  116. .order(detected_at: :desc, id: :desc)
  117. .limit(100)
  118. .pluck(:kind, :external_id, :occurred_at, :detected_at)
  119. .map do |kind, external_id, occurred_at, detected_at|
  120. {
  121. kind: kind,
  122. external_id: external_id,
  123. occurred_at: occurred_at&.iso8601,
  124. detected_at: detected_at&.iso8601
  125. }
  126. end
  127. {
  128. username: profile.username,
  129. ig_user_id: profile.ig_user_id,
  130. display_name: profile.display_name,
  131. bio: profile.bio,
  132. following: profile.following,
  133. follows_you: profile.follows_you,
  134. can_message: profile.can_message,
  135. restriction_reason: profile.restriction_reason,
  136. last_active_at: profile.last_active_at&.iso8601,
  137. last_story_seen_at: profile.last_story_seen_at&.iso8601,
  138. last_post_at: profile.last_post_at&.iso8601,
  139. recent_outgoing_messages: recent_messages,
  140. recent_activity_events: recent_events,
  141. captured_profile_posts: Array(collected_posts).map do |post|
  142. {
  143. shortcode: post.shortcode,
  144. taken_at: post.taken_at&.iso8601,
  145. caption: post.caption,
  146. permalink: post.permalink_url,
  147. comments: post.instagram_profile_post_comments.recent_first.limit(10).map do |c|
  148. {
  149. author_username: c.author_username,
  150. body: c.body,
  151. commented_at: c.commented_at&.iso8601
  152. }
  153. end
  154. }
  155. end,
  156. captured_profile_image_descriptions: Array(described_posts).map do |post|
  157. analysis = post.analysis.is_a?(Hash) ? post.analysis : {}
  158. {
  159. shortcode: post.shortcode,
  160. taken_at: post.taken_at&.iso8601,
  161. caption: post.caption,
  162. image_description: analysis["image_description"].to_s.presence,
  163. topics: Array(analysis["topics"]).first(10),
  164. comment_suggestions: Array(analysis["comment_suggestions"]).first(5)
  165. }
  166. end,
  167. accepted_image_inputs: {
  168. policy: "Only accepted images are used for combined demographic insights. Exclude deleted posts and skipped/duplicate story artifacts.",
  169. accepted_profile_posts: accepted_media_context[:accepted_profile_posts],
  170. accepted_story_images: accepted_media_context[:accepted_story_images],
  171. prompt_inputs: {
  172. combined_insights_required: [ "age_range", "gender_indicators", "location_signals" ],
  173. instruction: "Aggregate demographic evidence from accepted profile posts and accepted story images."
  174. }
  175. },
  176. historical_narrative_text: history_narrative,
  177. historical_narrative_chunks: history_chunks
  178. }
  179. end
  180. def build_media_inputs(profile:, collected_posts:)
  181. media = []
  182. if profile.avatar.attached?
  183. encoded = encode_blob_to_data_url(profile.avatar.blob)
  184. media << { type: "image", url: encoded, bytes: profile.avatar.blob.download } if encoded.present?
  185. elsif profile.profile_pic_url.to_s.strip.present?
  186. media << { type: "image", url: profile.profile_pic_url.to_s.strip }
  187. end
  188. Array(collected_posts).first(MAX_AI_IMAGE_COUNT).each do |post|
  189. next unless post.media.attached?
  190. blob = post.media.blob
  191. next unless blob&.content_type.to_s.start_with?("image/")
  192. next if blob.byte_size.to_i <= 0
  193. encoded = encode_blob_to_data_url(blob)
  194. next if encoded.blank?
  195. media << { type: "image", url: encoded, bytes: blob.download }
  196. end
  197. media
  198. end
  199. def enrich_first_profile_images!(account:, profile:, collected_posts:)
  200. selected = Array(collected_posts).select { |p| p.media.attached? }.first(MAX_PROFILE_IMAGE_DESCRIPTION_COUNT)
  201. selected.each do |post|
  202. analysis_data = run_post_image_description!(account: account, profile: profile, post: post)
  203. next unless analysis_data.is_a?(Hash)
  204. post.update!(
  205. ai_status: "analyzed",
  206. analyzed_at: Time.current,
  207. ai_provider: analysis_data["provider"],
  208. ai_model: analysis_data["model"],
  209. analysis: analysis_data["analysis"],
  210. metadata: (post.metadata || {}).merge(
  211. "analysis_input" => {
  212. "shortcode" => post.shortcode,
  213. "taken_at" => post.taken_at&.iso8601,
  214. "caption" => post.caption.to_s,
  215. "image_description" => analysis_data.dig("analysis", "image_description"),
  216. "topics" => Array(analysis_data.dig("analysis", "topics")).first(10),
  217. "comment_suggestions" => Array(analysis_data.dig("analysis", "comment_suggestions")).first(5)
  218. }
  219. )
  220. )
  221. PostFaceRecognitionService.new.process!(post: post)
  222. Ai::ProfileAutoTagger.sync_from_post_analysis!(profile: profile, analysis: analysis_data["analysis"])
  223. rescue StandardError
  224. next
  225. end
  226. selected
  227. end
  228. def run_post_image_description!(account:, profile:, post:)
  229. history_narrative = profile.history_narrative_text(max_chunks: 3)
  230. history_chunks = profile.history_narrative_chunks(max_chunks: 6)
  231. payload = {
  232. post: {
  233. shortcode: post.shortcode,
  234. caption: post.caption,
  235. taken_at: post.taken_at&.iso8601,
  236. permalink: post.permalink_url,
  237. likes_count: post.likes_count,
  238. comments_count: post.comments_count,
  239. comments: post.instagram_profile_post_comments.recent_first.limit(25).map do |c|
  240. {
  241. author_username: c.author_username,
  242. body: c.body,
  243. commented_at: c.commented_at&.iso8601
  244. }
  245. end
  246. },
  247. author_profile: {
  248. username: profile.username,
  249. display_name: profile.display_name,
  250. bio: profile.bio,
  251. can_message: profile.can_message,
  252. tags: profile.profile_tags.pluck(:name).sort
  253. },
  254. rules: {
  255. require_manual_review: true,
  256. style: "gen_z_light",
  257. historical_narrative_text: history_narrative,
  258. historical_narrative_chunks: history_chunks
  259. }
  260. }
  261. run = Ai::Runner.new(account: account).analyze!(
  262. purpose: "post",
  263. analyzable: post,
  264. payload: payload,
  265. media: build_post_media_payload(post),
  266. media_fingerprint: media_fingerprint_for(post)
  267. )
  268. {
  269. "provider" => run[:provider].key,
  270. "model" => run.dig(:result, :model),
  271. "analysis" => run.dig(:result, :analysis)
  272. }
  273. end
  274. def build_post_media_payload(post)
  275. return { type: "none" } unless post.media.attached?
  276. blob = post.media.blob
  277. return { type: "none" } unless blob&.content_type.to_s.start_with?("image/")
  278. if blob.byte_size.to_i > MAX_INLINE_IMAGE_BYTES
  279. return { type: "image", content_type: blob.content_type, url: post.source_media_url.to_s }
  280. end
  281. data = blob.download
  282. {
  283. type: "image",
  284. content_type: blob.content_type,
  285. bytes: data,
  286. image_data_url: "data:#{blob.content_type};base64,#{Base64.strict_encode64(data)}"
  287. }
  288. rescue StandardError
  289. { type: "none" }
  290. end
  291. def media_fingerprint_for(post)
  292. return post.media_url_fingerprint.to_s if post.media_url_fingerprint.to_s.present?
  293. if post.media.attached?
  294. checksum = post.media.blob&.checksum.to_s
  295. return "blob:#{checksum}" if checksum.present?
  296. end
  297. url = post.source_media_url.to_s
  298. return Digest::SHA256.hexdigest(url) if url.present?
  299. nil
  300. end
  301. def encode_blob_to_data_url(blob)
  302. return nil unless blob
  303. return nil unless blob.content_type.to_s.start_with?("image/")
  304. return nil if blob.byte_size.to_i > MAX_INLINE_IMAGE_BYTES
  305. "data:#{blob.content_type};base64,#{Base64.strict_encode64(blob.download)}"
  306. rescue StandardError
  307. nil
  308. end
  309. def update_profile_demographics_from_analysis!(profile:, analysis:)
  310. return unless analysis.is_a?(Hash)
  311. demo = analysis["demographic_estimates"]
  312. demo = analysis["self_declared"] if !demo.is_a?(Hash) && analysis["self_declared"].is_a?(Hash)
  313. demo = {} unless demo.is_a?(Hash)
  314. attrs = {
  315. ai_persona_summary: analysis["summary"].to_s.presence || profile.ai_persona_summary,
  316. ai_last_analyzed_at: Time.current
  317. }
  318. age = integer_or_nil(demo["age"])
  319. age ||= integer_or_nil(analysis.dig("self_declared", "age"))
  320. age ||= inferred_age_from_text(profile: profile, analysis: analysis)
  321. attrs[:ai_estimated_age] = age if age.present?
  322. gender = demo["gender"].to_s.strip
  323. gender = analysis.dig("self_declared", "gender").to_s.strip if gender.blank?
  324. gender = inferred_gender_from_text(profile: profile, analysis: analysis) if gender.blank?
  325. attrs[:ai_estimated_gender] = gender if gender.present?
  326. location = demo["location"].to_s.strip
  327. location = analysis.dig("self_declared", "location").to_s.strip if location.blank?
  328. location = inferred_location_from_text(profile: profile, analysis: analysis) if location.blank?
  329. attrs[:ai_estimated_location] = location if location.present?
  330. attrs[:ai_age_confidence] = float_or_nil(demo["age_confidence"]) || (age.present? ? 0.35 : nil)
  331. attrs[:ai_gender_confidence] = float_or_nil(demo["gender_confidence"]) || (gender.present? ? 0.3 : nil)
  332. attrs[:ai_location_confidence] = float_or_nil(demo["location_confidence"]) || (location.present? ? 0.25 : nil)
  333. profile.update!(attrs)
  334. rescue StandardError
  335. nil
  336. end
  337. def aggregate_demographics_from_accumulated_json!(account:, profile:, latest_profile_analysis:, accepted_media_context:)
  338. dataset = build_demographics_dataset(
  339. profile: profile,
  340. latest_profile_analysis: latest_profile_analysis,
  341. accepted_media_context: accepted_media_context
  342. )
  343. aggregated = Ai::ProfileDemographicsAggregator.new(account: account).aggregate!(dataset: dataset)
  344. return unless aggregated.is_a?(Hash) && aggregated[:ok] == true
  345. profile_inference = aggregated[:profile_inference].is_a?(Hash) ? aggregated[:profile_inference] : {}
  346. post_inferences = Array(aggregated[:post_inferences]).select { |entry| entry.is_a?(Hash) }
  347. combined_insights = build_combined_prompt_insights(profile_inference: profile_inference, post_inferences: post_inferences, dataset: dataset)
  348. persist_profile_demographic_inference!(
  349. profile: profile,
  350. profile_inference: profile_inference,
  351. source: aggregated[:source].to_s,
  352. error: aggregated[:error].to_s.presence
  353. )
  354. persist_profile_post_demographic_inferences!(
  355. profile: profile,
  356. profile_inference: profile_inference,
  357. post_inferences: post_inferences,
  358. source: aggregated[:source].to_s
  359. )
  360. persist_feed_post_demographic_inferences!(
  361. profile: profile,
  362. profile_inference: profile_inference,
  363. post_inferences: post_inferences,
  364. source: aggregated[:source].to_s
  365. )
  366. persist_combined_prompt_insights!(profile: profile, combined_insights: combined_insights)
  367. profile.record_event!(
  368. kind: "demographics_aggregated",
  369. external_id: "demographics_aggregated:#{Time.current.utc.iso8601(6)}",
  370. occurred_at: Time.current,
  371. metadata: {
  372. source: aggregated[:source].to_s,
  373. profile_inference: profile_inference,
  374. post_inferences_count: post_inferences.length,
  375. profile_dataset_rows: dataset.dig(:analysis_pool, :profile_rows_count),
  376. post_dataset_rows: dataset.dig(:analysis_pool, :post_rows_count),
  377. accepted_profile_images: dataset.dig(:analysis_pool, :accepted_profile_images_count),
  378. accepted_story_images: dataset.dig(:analysis_pool, :accepted_story_images_count),
  379. combined_prompt_insights: combined_insights,
  380. aggregator_error: aggregated[:error].to_s.presence
  381. }
  382. )
  383. rescue StandardError
  384. nil
  385. end
  386. def build_demographics_dataset(profile:, latest_profile_analysis:, accepted_media_context:)
  387. profile_runs = profile.ai_analyses.where(purpose: "profile", status: "succeeded").recent_first.limit(30)
  388. profile_post_runs = profile.instagram_profile_posts.where.not(analysis: nil).recent_first.limit(220).select { |post| accepted_profile_post?(post) }.first(120)
  389. feed_post_runs = profile.instagram_account.instagram_posts.where(instagram_profile_id: profile.id).where.not(analysis: nil).recent_first.limit(120)
  390. story_rows = accepted_story_demographic_rows(profile: profile)
  391. profile_demographics = []
  392. if latest_profile_analysis.is_a?(Hash)
  393. profile_demographics << extract_demographics_from_analysis(latest_profile_analysis)
  394. end
  395. profile_runs.each do |row|
  396. extracted = extract_demographics_from_analysis(row.analysis)
  397. profile_demographics << extracted if extracted.present?
  398. end
  399. profile_insight_rows = profile.instagram_profile_insights.order(created_at: :desc).limit(20)
  400. profile_insight_rows.each do |insight|
  401. analysis = insight.raw_analysis
  402. extracted = extract_demographics_from_analysis(analysis)
  403. profile_demographics << extracted if extracted.present?
  404. end
  405. post_demographics = []
  406. profile_post_runs.each do |post|
  407. extracted = extract_demographics_from_analysis(post.analysis)
  408. next if extracted.blank?
  409. post_demographics << extracted.merge(shortcode: post.shortcode, source: "instagram_profile_posts")
  410. end
  411. feed_post_runs.each do |post|
  412. extracted = extract_demographics_from_analysis(post.analysis)
  413. next if extracted.blank?
  414. post_demographics << extracted.merge(shortcode: post.shortcode, source: "instagram_posts")
  415. end
  416. story_rows.each do |story_row|
  417. post_demographics << story_row
  418. end
  419. {
  420. profile: {
  421. username: profile.username,
  422. display_name: profile.display_name,
  423. bio: profile.bio,
  424. current_demographics: {
  425. age: profile.ai_estimated_age,
  426. age_confidence: profile.ai_age_confidence,
  427. gender: profile.ai_estimated_gender,
  428. gender_confidence: profile.ai_gender_confidence,
  429. location: profile.ai_estimated_location,
  430. location_confidence: profile.ai_location_confidence
  431. }
  432. },
  433. analysis_pool: {
  434. profile_demographics: profile_demographics,
  435. post_demographics: post_demographics,
  436. profile_rows_count: profile_demographics.length,
  437. post_rows_count: post_demographics.length,
  438. accepted_profile_images_count: accepted_media_context[:accepted_profile_posts_count].to_i,
  439. accepted_story_images_count: accepted_media_context[:accepted_story_images_count].to_i
  440. }
  441. }
  442. end
  443. def build_accepted_media_context(profile:)
  444. accepted_profile_posts =
  445. profile.instagram_profile_posts
  446. .recent_first
  447. .limit(220)
  448. .select { |post| accepted_profile_post?(post) }
  449. .first(40)
  450. .map do |post|
  451. analysis = post.analysis.is_a?(Hash) ? post.analysis : {}
  452. {
  453. source_ref: post.shortcode,
  454. source_type: "instagram_profile_post",
  455. taken_at: post.taken_at&.iso8601,
  456. caption: post.caption.to_s.tr("\n", " ").byteslice(0, 260),
  457. image_description: analysis["image_description"].to_s.presence,
  458. relevant: analysis["relevant"],
  459. inferred_demographics: analysis["inferred_demographics"].is_a?(Hash) ? analysis["inferred_demographics"] : nil
  460. }
  461. end
  462. accepted_story_images =
  463. profile.instagram_profile_events
  464. .where(kind: InstagramProfileEvent::STORY_ARCHIVE_EVENT_KINDS)
  465. .with_attached_media
  466. .order(detected_at: :desc, id: :desc)
  467. .limit(220)
  468. .select { |event| accepted_story_event?(event) }
  469. .first(40)
  470. .map do |event|
  471. meta = event.metadata.is_a?(Hash) ? event.metadata : {}
  472. intel = meta["local_story_intelligence"].is_a?(Hash) ? meta["local_story_intelligence"] : {}
  473. {
  474. source_ref: meta["story_id"].to_s.presence || event.external_id.to_s,
  475. source_type: "instagram_story",
  476. taken_at: event.occurred_at&.iso8601 || event.detected_at&.iso8601,
  477. image_description: meta["ai_image_description"].to_s.presence,
  478. ocr_text: intel["ocr_text"].to_s.presence || meta["ocr_text"].to_s.presence,
  479. hashtags: Array(intel["hashtags"] || meta["hashtags"]).first(8),
  480. mentions: Array(intel["mentions"] || meta["mentions"]).first(8),
  481. objects: Array(intel["objects"] || meta["content_signals"]).first(10),
  482. relevant: true
  483. }
  484. end
  485. {
  486. accepted_profile_posts: accepted_profile_posts,
  487. accepted_story_images: accepted_story_images,
  488. accepted_profile_posts_count: accepted_profile_posts.length,
  489. accepted_story_images_count: accepted_story_images.length
  490. }
  491. end
  492. def accepted_story_demographic_rows(profile:)
  493. profile.instagram_profile_events
  494. .where(kind: InstagramProfileEvent::STORY_ARCHIVE_EVENT_KINDS)
  495. .with_attached_media
  496. .order(detected_at: :desc, id: :desc)
  497. .limit(220)
  498. .select { |event| accepted_story_event?(event) }
  499. .filter_map do |event|
  500. meta = event.metadata.is_a?(Hash) ? event.metadata : {}
  501. extracted = extract_demographics_from_story_metadata(metadata: meta)
  502. next if extracted.blank?
  503. story_ref = meta["story_id"].to_s.presence || event.external_id.to_s
  504. extracted.merge(shortcode: story_ref, source: "instagram_stories", relevant: true)
  505. end
  506. .first(120)
  507. end
  508. def accepted_profile_post?(post)
  509. return false unless post
  510. return false unless post.analysis.is_a?(Hash)
  511. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  512. return false if ActiveModel::Type::Boolean.new.cast(metadata["deleted_from_source"])
  513. relevant = post.analysis["relevant"]
  514. return false if relevant == false
  515. return false unless post.media.attached? || post.source_media_url.to_s.present?
  516. true
  517. end
  518. def accepted_story_event?(event)
  519. return false unless event
  520. return false unless event.media.attached?
  521. metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
  522. return false if ActiveModel::Type::Boolean.new.cast(metadata["skipped"])
  523. intel = metadata["local_story_intelligence"].is_a?(Hash) ? metadata["local_story_intelligence"] : {}
  524. return true if intel.present?
  525. return true if metadata["ai_image_description"].to_s.present?
  526. return true if metadata["ocr_text"].to_s.present?
  527. return true if Array(metadata["content_signals"]).any?
  528. return true if Array(metadata["hashtags"]).any?
  529. return true if Array(metadata["mentions"]).any?
  530. false
  531. end
  532. def extract_demographics_from_story_metadata(metadata:)
  533. raw = metadata.is_a?(Hash) ? metadata : {}
  534. intel = raw["local_story_intelligence"].is_a?(Hash) ? raw["local_story_intelligence"] : {}
  535. location_tags = Array(intel["location_tags"] || raw["location_tags"]).map(&:to_s).reject(&:blank?)
  536. text_parts = [
  537. raw["ai_image_description"].to_s,
  538. intel["ocr_text"].to_s,
  539. raw["ocr_text"].to_s,
  540. intel["transcript"].to_s,
  541. Array(intel["hashtags"] || raw["hashtags"]).join(" "),
  542. Array(intel["mentions"] || raw["mentions"]).join(" "),
  543. location_tags.join(" ")
  544. ].map(&:strip).reject(&:blank?)
  545. text = text_parts.join(" ").downcase
  546. age =
  547. if (m = text.match(/\b([1-7]\d)\s?(?:yo|yrs?|years?\s*old)\b/))
  548. m[1].to_i
  549. end
  550. gender =
  551. if text.match?(/\b(she\/her|she her|woman|girl|mrs|ms)\b/)
  552. "female"
  553. elsif text.match?(/\b(he\/him|he him|man|boy|mr)\b/)
  554. "male"
  555. elsif text.match?(/\b(they\/them|non[- ]?binary)\b/)
  556. "non-binary"
  557. end
  558. location = location_tags.first.to_s.presence
  559. if location.blank? && (m = text.match(/(?:based in|from|in)\s+([a-z][a-z\s,.-]{2,40})/))
  560. location = m[1].to_s.split(/[|•]/).first.to_s.strip.titleize
  561. end
  562. evidence = text_parts.first(3).join(" | ").byteslice(0, 220)
  563. out = {
  564. age: age,
  565. age_confidence: age.present? ? 0.28 : nil,
  566. gender: normalize_unknown_string(gender),
  567. gender_confidence: gender.present? ? 0.26 : nil,
  568. location: normalize_unknown_string(location),
  569. location_confidence: location.present? ? 0.24 : nil,
  570. evidence: evidence.presence
  571. }.compact
  572. demo_values = [out[:age], out[:gender], out[:location]].compact
  573. return {} if demo_values.empty?
  574. out
  575. end
  576. def build_combined_prompt_insights(profile_inference:, post_inferences:, dataset:)
  577. rows = Array(dataset.dig(:analysis_pool, :post_demographics))
  578. ages = rows.map { |entry| integer_or_nil(entry[:age] || entry["age"]) }.compact
  579. ages << integer_or_nil(profile_inference[:age])
  580. ages = ages.compact
  581. age_range = ages.any? ? "#{ages.min}-#{ages.max}" : nil
  582. genders = rows.map { |entry| normalize_unknown_string(entry[:gender] || entry["gender"]) }.compact
  583. genders << normalize_unknown_string(profile_inference[:gender])
  584. genders.concat(Array(post_inferences).map { |entry| normalize_unknown_string(entry[:gender] || entry["gender"]) })
  585. gender_indicators = genders.compact.group_by(&:itself).sort_by { |_value, bucket| -bucket.length }.first(3).map(&:first)
  586. locations = rows.map { |entry| normalize_unknown_string(entry[:location] || entry["location"]) }.compact
  587. locations << normalize_unknown_string(profile_inference[:location])
  588. locations.concat(Array(post_inferences).map { |entry| normalize_unknown_string(entry[:location] || entry["location"]) })
  589. location_signals = locations.compact.group_by(&:itself).sort_by { |_value, bucket| -bucket.length }.first(5).map(&:first)
  590. {
  591. age_range: age_range,
  592. gender_indicators: gender_indicators,
  593. location_signals: location_signals,
  594. accepted_profile_images_count: dataset.dig(:analysis_pool, :accepted_profile_images_count).to_i,
  595. accepted_story_images_count: dataset.dig(:analysis_pool, :accepted_story_images_count).to_i
  596. }.compact
  597. end
  598. def persist_combined_prompt_insights!(profile:, combined_insights:)
  599. return unless combined_insights.is_a?(Hash)
  600. return if combined_insights.except(:accepted_profile_images_count, :accepted_story_images_count).values.all?(&:blank?)
  601. line = [
  602. "Combined insights",
  603. "age_range=#{combined_insights[:age_range]}",
  604. "gender_indicators=#{Array(combined_insights[:gender_indicators]).join(', ')}",
  605. "location_signals=#{Array(combined_insights[:location_signals]).join(', ')}",
  606. "accepted_posts=#{combined_insights[:accepted_profile_images_count].to_i}",
  607. "accepted_stories=#{combined_insights[:accepted_story_images_count].to_i}"
  608. ].join(" | ")
  609. profile.update!(
  610. ai_persona_summary: [ profile.ai_persona_summary.to_s.presence, line ].compact.join("\n")
  611. )
  612. rescue StandardError
  613. nil
  614. end
  615. def extract_demographics_from_analysis(analysis)
  616. return {} unless analysis.is_a?(Hash)
  617. demo = analysis["demographic_estimates"].is_a?(Hash) ? analysis["demographic_estimates"] : {}
  618. declared = analysis["self_declared"].is_a?(Hash) ? analysis["self_declared"] : {}
  619. inferred = analysis["inferred_demographics"].is_a?(Hash) ? analysis["inferred_demographics"] : {}
  620. age = integer_or_nil(demo["age"]) || integer_or_nil(declared["age"]) || integer_or_nil(inferred["age"])
  621. gender = demo["gender"].to_s.strip.presence || declared["gender"].to_s.strip.presence || inferred["gender"].to_s.strip.presence
  622. location = demo["location"].to_s.strip.presence || declared["location"].to_s.strip.presence || inferred["location"].to_s.strip.presence
  623. {
  624. age: age,
  625. age_confidence: float_or_nil(demo["age_confidence"]) || float_or_nil(inferred["age_confidence"]),
  626. gender: normalize_unknown_string(gender),
  627. gender_confidence: float_or_nil(demo["gender_confidence"]) || float_or_nil(inferred["gender_confidence"]),
  628. location: normalize_unknown_string(location),
  629. location_confidence: float_or_nil(demo["location_confidence"]) || float_or_nil(inferred["location_confidence"]),
  630. evidence: analysis["evidence"].to_s.presence || demo["evidence"].to_s.presence
  631. }.compact
  632. end
  633. def persist_profile_demographic_inference!(profile:, profile_inference:, source:, error:)
  634. attrs = { ai_last_analyzed_at: Time.current }
  635. maybe_age = integer_or_nil(profile_inference[:age])
  636. maybe_age_conf = float_or_nil(profile_inference[:age_confidence])
  637. if should_replace_value?(current: profile.ai_estimated_age, candidate: maybe_age, current_confidence: profile.ai_age_confidence, candidate_confidence: maybe_age_conf)
  638. attrs[:ai_estimated_age] = maybe_age
  639. attrs[:ai_age_confidence] = maybe_age_conf if maybe_age_conf
  640. end
  641. maybe_gender = normalize_unknown_string(profile_inference[:gender])
  642. maybe_gender_conf = float_or_nil(profile_inference[:gender_confidence])
  643. if should_replace_value?(current: normalize_unknown_string(profile.ai_estimated_gender), candidate: maybe_gender, current_confidence: profile.ai_gender_confidence, candidate_confidence: maybe_gender_conf)
  644. attrs[:ai_estimated_gender] = maybe_gender
  645. attrs[:ai_gender_confidence] = maybe_gender_conf if maybe_gender_conf
  646. end
  647. maybe_location = normalize_unknown_string(profile_inference[:location])
  648. maybe_location_conf = float_or_nil(profile_inference[:location_confidence])
  649. if should_replace_value?(current: normalize_unknown_string(profile.ai_estimated_location), candidate: maybe_location, current_confidence: profile.ai_location_confidence, candidate_confidence: maybe_location_conf)
  650. attrs[:ai_estimated_location] = maybe_location
  651. attrs[:ai_location_confidence] = maybe_location_conf if maybe_location_conf
  652. end
  653. evidence = [ profile_inference[:evidence].to_s, profile_inference[:why].to_s, error.to_s ].reject(&:blank?).join(" | ")
  654. if evidence.present?
  655. attrs[:ai_persona_summary] = [ profile.ai_persona_summary.to_s.presence, evidence ].compact.join("\n")
  656. end
  657. profile.update!(attrs) if attrs.keys.length > 1 || attrs[:ai_persona_summary].present?
  658. end
  659. def persist_profile_post_demographic_inferences!(profile:, profile_inference:, post_inferences:, source:)
  660. by_shortcode = post_inferences.index_by { |entry| entry[:shortcode].to_s }
  661. profile.instagram_profile_posts.recent_first.limit(220).each do |post|
  662. next unless accepted_profile_post?(post)
  663. post_hint = by_shortcode[post.shortcode.to_s]
  664. enrich_post_demographics!(
  665. record: post,
  666. profile_inference: profile_inference,
  667. post_hint: post_hint,
  668. source: source
  669. )
  670. end
  671. end
  672. def persist_feed_post_demographic_inferences!(profile:, profile_inference:, post_inferences:, source:)
  673. by_shortcode = post_inferences.index_by { |entry| entry[:shortcode].to_s }
  674. profile.instagram_account.instagram_posts.where(instagram_profile_id: profile.id).recent_first.limit(150).each do |post|
  675. analysis = post.analysis.is_a?(Hash) ? post.analysis : {}
  676. next if analysis["relevant"] == false
  677. post_hint = by_shortcode[post.shortcode.to_s]
  678. enrich_post_demographics!(
  679. record: post,
  680. profile_inference: profile_inference,
  681. post_hint: post_hint,
  682. source: source
  683. )
  684. end
  685. end
  686. def enrich_post_demographics!(record:, profile_inference:, post_hint:, source:)
  687. base = record.analysis.is_a?(Hash) ? record.analysis.deep_dup : {}
  688. inferred = base["inferred_demographics"].is_a?(Hash) ? base["inferred_demographics"].deep_dup : {}
  689. relevant = ActiveModel::Type::Boolean.new.cast(post_hint&.dig(:relevant))
  690. relevant ||= ActiveModel::Type::Boolean.new.cast(base["relevant"])
  691. age = integer_or_nil(post_hint&.dig(:age)) || integer_or_nil(profile_inference[:age])
  692. gender = normalize_unknown_string(post_hint&.dig(:gender)) || normalize_unknown_string(profile_inference[:gender])
  693. location = normalize_unknown_string(post_hint&.dig(:location)) || normalize_unknown_string(profile_inference[:location])
  694. confidence = float_or_nil(post_hint&.dig(:confidence)) || float_or_nil(profile_inference[:age_confidence]) || 0.3
  695. changed = false
  696. if inferred["age"].blank? && age.present?
  697. inferred["age"] = age
  698. changed = true
  699. end
  700. if normalize_unknown_string(inferred["gender"]).blank? && gender.present?
  701. inferred["gender"] = gender
  702. changed = true
  703. end
  704. if normalize_unknown_string(inferred["location"]).blank? && location.present?
  705. inferred["location"] = location
  706. changed = true
  707. end
  708. if changed
  709. inferred["confidence"] = confidence
  710. inferred["age_confidence"] = float_or_nil(profile_inference[:age_confidence]) if inferred["age_confidence"].blank?
  711. inferred["gender_confidence"] = float_or_nil(profile_inference[:gender_confidence]) if inferred["gender_confidence"].blank?
  712. inferred["location_confidence"] = float_or_nil(profile_inference[:location_confidence]) if inferred["location_confidence"].blank?
  713. inferred["relevant"] = relevant
  714. inferred["source"] = source.to_s.presence || "json_aggregator"
  715. inferred["updated_at"] = Time.current.utc.iso8601(3)
  716. inferred["evidence"] = post_hint&.dig(:evidence).to_s.presence || profile_inference[:evidence].to_s.presence
  717. base["inferred_demographics"] = inferred
  718. record.update!(analysis: base)
  719. end
  720. rescue StandardError
  721. nil
  722. end
  723. def should_replace_value?(current:, candidate:, current_confidence:, candidate_confidence:)
  724. return false if candidate.blank?
  725. return true if current.blank?
  726. current_unknown = normalize_unknown_string(current).blank?
  727. return true if current_unknown
  728. cand_conf = float_or_nil(candidate_confidence).to_f
  729. curr_conf = float_or_nil(current_confidence).to_f
  730. cand_conf > (curr_conf + 0.1)
  731. end
  732. def normalize_unknown_string(value)
  733. text = value.to_s.strip
  734. return nil if text.blank?
  735. return nil if %w[unknown n/a none null].include?(text.downcase)
  736. text
  737. end
  738. def inferred_age_from_text(profile:, analysis:)
  739. text = [ profile.bio.to_s, analysis["summary"].to_s ].join(" ").downcase
  740. return 21 if text.match?(/\b(student|college|university|campus|undergrad)\b/)
  741. return 17 if text.match?(/\b(high school|school life|class of 20\d{2})\b/)
  742. return 34 if text.match?(/\b(mom|dad|parent)\b/)
  743. 26
  744. end
  745. def inferred_gender_from_text(profile:, analysis:)
  746. text = [ profile.bio.to_s, analysis["summary"].to_s ].join(" ").downcase
  747. return "female" if text.match?(/\b(she\/her|she her|woman|girl|mrs|ms)\b/)
  748. return "male" if text.match?(/\b(he\/him|he him|man|boy|mr)\b/)
  749. return "non-binary" if text.match?(/\b(they\/them|non[- ]?binary)\b/)
  750. "unknown"
  751. end
  752. def inferred_location_from_text(profile:, analysis:)
  753. text = [
  754. profile.bio.to_s,
  755. analysis["summary"].to_s,
  756. Array(analysis["languages"]).map { |l| l.is_a?(Hash) ? l["language"] : l }.join(" ")
  757. ].join(" ").downcase
  758. if (m = text.match(/(?:📍|based in|from)\s+([a-z][a-z\s,.-]{2,40})/))
  759. return m[1].to_s.split(/[|•]/).first.to_s.strip.titleize
  760. end
  761. return "United States" if text.match?(/\b(english|usa|us)\b/)
  762. return "India" if text.match?(/\b(hindi|india|indian)\b/)
  763. "unknown"
  764. end
  765. def integer_or_nil(value)
  766. return nil if value.blank?
  767. Integer(value)
  768. rescue StandardError
  769. nil
  770. end
  771. def float_or_nil(value)
  772. return nil if value.blank?
  773. Float(value)
  774. rescue StandardError
  775. nil
  776. end
  777. end

app/jobs/analyze_instagram_profile_post_job.rb

0.0% lines covered

100.0% branches covered

363 relevant lines. 0 lines covered and 363 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class AnalyzeInstagramProfilePostJob < ApplicationJob
  2. queue_as :ai_visual_queue
  3. PROFILE_INCOMPLETE_REASON_CODES =
  4. if defined?(ProcessPostMetadataTaggingJob::PROFILE_INCOMPLETE_REASON_CODES)
  5. ProcessPostMetadataTaggingJob::PROFILE_INCOMPLETE_REASON_CODES
  6. else
  7. %w[
  8. latest_posts_not_analyzed
  9. insufficient_analyzed_posts
  10. no_recent_posts_available
  11. missing_structured_post_signals
  12. profile_preparation_failed
  13. profile_preparation_error
  14. ].freeze
  15. end
  16. COMMENT_RETRY_MAX_ATTEMPTS = ENV.fetch("POST_COMMENT_RETRY_MAX_ATTEMPTS", 3).to_i.clamp(1, 10)
  17. DEFAULT_TASK_FLAGS = {
  18. analyze_visual: true,
  19. analyze_faces: true,
  20. run_ocr: true,
  21. run_video: true,
  22. run_metadata: true,
  23. generate_comments: true,
  24. enforce_comment_evidence_policy: true,
  25. retry_on_incomplete_profile: true
  26. }.freeze
  27. def perform(
  28. instagram_account_id:,
  29. instagram_profile_id:,
  30. instagram_profile_post_id:,
  31. task_flags: {},
  32. pipeline_mode: "async"
  33. )
  34. account = InstagramAccount.find(instagram_account_id)
  35. profile = account.instagram_profiles.find(instagram_profile_id)
  36. post = profile.instagram_profile_posts.find(instagram_profile_post_id)
  37. policy_decision = Instagram::ProfileScanPolicy.new(profile: profile).decision
  38. if policy_decision[:skip_post_analysis]
  39. if policy_decision[:reason_code].to_s == "non_personal_profile_page" || policy_decision[:reason_code].to_s == "scan_excluded_tag"
  40. Instagram::ProfileScanPolicy.mark_scan_excluded!(profile: profile)
  41. end
  42. Instagram::ProfileScanPolicy.mark_post_analysis_skipped!(post: post, decision: policy_decision)
  43. return
  44. end
  45. resolved_flags = resolve_task_flags(post: post, task_flags: task_flags)
  46. if pipeline_mode.to_s == "inline"
  47. perform_inline(
  48. account: account,
  49. profile: profile,
  50. post: post,
  51. task_flags: resolved_flags
  52. )
  53. return
  54. end
  55. start_orchestrated_pipeline!(
  56. account: account,
  57. profile: profile,
  58. post: post,
  59. task_flags: resolved_flags
  60. )
  61. rescue StandardError => e
  62. post&.update!(ai_status: "failed") if defined?(post) && post&.persisted?
  63. Turbo::StreamsChannel.broadcast_append_to(
  64. account,
  65. target: "notifications",
  66. partial: "shared/notification",
  67. locals: { kind: "alert", message: "Profile post analysis failed: #{e.message}" }
  68. ) if defined?(account) && account
  69. raise
  70. end
  71. private
  72. def start_orchestrated_pipeline!(account:, profile:, post:, task_flags:)
  73. pipeline_state = Ai::PostAnalysisPipelineState.new(post: post)
  74. run_id = pipeline_state.start!(
  75. task_flags: task_flags,
  76. source_job: self.class.name
  77. )
  78. required_steps = pipeline_state.required_steps(run_id: run_id)
  79. Ops::StructuredLogger.info(
  80. event: "ai.pipeline.started",
  81. payload: {
  82. active_job_id: job_id,
  83. instagram_account_id: account.id,
  84. instagram_profile_id: profile.id,
  85. instagram_profile_post_id: post.id,
  86. pipeline_run_id: run_id,
  87. required_steps: required_steps,
  88. task_flags: task_flags
  89. }
  90. )
  91. enqueue_step_job!(
  92. step: "visual",
  93. job_class: ProcessPostVisualAnalysisJob,
  94. account: account,
  95. profile: profile,
  96. post: post,
  97. run_id: run_id,
  98. pipeline_state: pipeline_state
  99. )
  100. enqueue_step_job!(
  101. step: "face",
  102. job_class: ProcessPostFaceAnalysisJob,
  103. account: account,
  104. profile: profile,
  105. post: post,
  106. run_id: run_id,
  107. pipeline_state: pipeline_state
  108. )
  109. enqueue_step_job!(
  110. step: "ocr",
  111. job_class: ProcessPostOcrAnalysisJob,
  112. account: account,
  113. profile: profile,
  114. post: post,
  115. run_id: run_id,
  116. pipeline_state: pipeline_state
  117. )
  118. enqueue_step_job!(
  119. step: "video",
  120. job_class: ProcessPostVideoAnalysisJob,
  121. account: account,
  122. profile: profile,
  123. post: post,
  124. run_id: run_id,
  125. pipeline_state: pipeline_state
  126. )
  127. FinalizePostAnalysisPipelineJob.perform_later(
  128. instagram_account_id: account.id,
  129. instagram_profile_id: profile.id,
  130. instagram_profile_post_id: post.id,
  131. pipeline_run_id: run_id,
  132. attempts: 0
  133. )
  134. end
  135. def enqueue_step_job!(step:, job_class:, account:, profile:, post:, run_id:, pipeline_state:)
  136. return unless pipeline_state.required_steps(run_id: run_id).include?(step)
  137. job = job_class.perform_later(
  138. instagram_account_id: account.id,
  139. instagram_profile_id: profile.id,
  140. instagram_profile_post_id: post.id,
  141. pipeline_run_id: run_id
  142. )
  143. pipeline_state.mark_step_queued!(
  144. run_id: run_id,
  145. step: step,
  146. queue_name: job.queue_name,
  147. active_job_id: job.job_id,
  148. result: {
  149. enqueued_by: self.class.name,
  150. enqueued_at: Time.current.iso8601(3)
  151. }
  152. )
  153. Ops::StructuredLogger.info(
  154. event: "ai.pipeline.step_enqueued",
  155. payload: {
  156. active_job_id: job_id,
  157. instagram_account_id: account.id,
  158. instagram_profile_id: profile.id,
  159. instagram_profile_post_id: post.id,
  160. pipeline_run_id: run_id,
  161. step: step,
  162. queue_name: job.queue_name,
  163. enqueued_job_id: job.job_id
  164. }
  165. )
  166. rescue StandardError => e
  167. pipeline_state.mark_step_completed!(
  168. run_id: run_id,
  169. step: step,
  170. status: "failed",
  171. error: "enqueue_failed: #{e.class}: #{e.message}",
  172. result: {
  173. reason: "enqueue_failed"
  174. }
  175. )
  176. Ops::StructuredLogger.warn(
  177. event: "ai.pipeline.step_enqueue_failed",
  178. payload: {
  179. active_job_id: job_id,
  180. instagram_account_id: account.id,
  181. instagram_profile_id: profile.id,
  182. instagram_profile_post_id: post.id,
  183. pipeline_run_id: run_id,
  184. step: step,
  185. error_class: e.class.name,
  186. error_message: e.message.to_s.byteslice(0, 280)
  187. }
  188. )
  189. end
  190. def perform_inline(account:, profile:, post:, task_flags:)
  191. builder = Ai::PostAnalysisContextBuilder.new(profile: profile, post: post)
  192. run = nil
  193. if task_flags[:analyze_visual]
  194. payload = builder.payload
  195. media = builder.media_payload
  196. run = Ai::Runner.new(account: account).analyze!(
  197. purpose: "post",
  198. analyzable: post,
  199. payload: payload,
  200. media: media,
  201. media_fingerprint: builder.media_fingerprint(media: media),
  202. provider_options: inline_provider_options(task_flags: task_flags)
  203. )
  204. post.update!(
  205. ai_status: "analyzed",
  206. analyzed_at: Time.current,
  207. ai_provider: run[:provider].key,
  208. ai_model: run.dig(:result, :model),
  209. analysis: run.dig(:result, :analysis)
  210. )
  211. end
  212. if task_flags[:analyze_faces]
  213. face_recognition_result = PostFaceRecognitionService.new.process!(post: post)
  214. merge_face_summary!(post: post, face_recognition_result: face_recognition_result)
  215. end
  216. if task_flags[:run_metadata]
  217. analysis_hash = post.analysis.is_a?(Hash) ? post.analysis : {}
  218. Ai::ProfileAutoTagger.sync_from_post_analysis!(profile: profile, analysis: analysis_hash)
  219. end
  220. comment_result = nil
  221. if task_flags[:generate_comments]
  222. comment_result = Ai::PostCommentGenerationService.new(
  223. account: account,
  224. profile: profile,
  225. post: post,
  226. enforce_required_evidence: ActiveModel::Type::Boolean.new.cast(task_flags[:enforce_comment_evidence_policy])
  227. ).run!
  228. post.reload
  229. if ActiveModel::Type::Boolean.new.cast(task_flags[:retry_on_incomplete_profile]) &&
  230. retryable_profile_incomplete_block?(post: post, comment_result: comment_result)
  231. enqueue_build_history_retry_if_needed!(account: account, profile: profile, post: post)
  232. end
  233. end
  234. post.update!(ai_status: "analyzed", analyzed_at: Time.current) unless post.ai_status.to_s == "analyzed"
  235. notification_message =
  236. if comment_result&.dig(:reason_code).to_s == "missing_required_evidence"
  237. "Profile post analyzed: #{post.shortcode}. Waiting for Build History to finish comment generation."
  238. else
  239. "Profile post analyzed: #{post.shortcode}."
  240. end
  241. Turbo::StreamsChannel.broadcast_append_to(
  242. account,
  243. target: "notifications",
  244. partial: "shared/notification",
  245. locals: {
  246. kind: "notice",
  247. message: notification_message
  248. }
  249. )
  250. end
  251. def merge_face_summary!(post:, face_recognition_result:)
  252. analysis = post.analysis.is_a?(Hash) ? post.analysis.deep_dup : {}
  253. face_meta = post.metadata.is_a?(Hash) ? post.metadata.dig("face_recognition") : nil
  254. face_meta = {} unless face_meta.is_a?(Hash)
  255. matched_people = Array(face_meta["matched_people"])
  256. analysis["face_summary"] = {
  257. "face_count" => face_meta["face_count"].to_i,
  258. "owner_faces_count" => matched_people.count { |row| ActiveModel::Type::Boolean.new.cast(row["owner_match"] || row[:owner_match]) },
  259. "recurring_faces_count" => matched_people.count { |row| ActiveModel::Type::Boolean.new.cast(row["recurring_face"] || row[:recurring_face]) },
  260. "detection_source" => face_meta["detection_source"].to_s.presence || face_recognition_result[:reason].to_s.presence,
  261. "participant_summary" => face_meta["participant_summary"].to_s.presence,
  262. "detection_reason" => face_meta["detection_reason"].to_s.presence,
  263. "detection_error" => face_meta["detection_error"].to_s.presence
  264. }.compact
  265. post.update!(analysis: analysis)
  266. rescue StandardError
  267. nil
  268. end
  269. def resolve_task_flags(post:, task_flags:)
  270. flags = DEFAULT_TASK_FLAGS.deep_dup
  271. incoming = task_flags.is_a?(Hash) ? task_flags : {}
  272. incoming.each do |key, value|
  273. symbol_key = key.to_s.underscore.to_sym
  274. next unless flags.key?(symbol_key)
  275. flags[symbol_key] = ActiveModel::Type::Boolean.new.cast(value)
  276. end
  277. unless post.media.attached? && post.media.blob&.content_type.to_s.start_with?("video/")
  278. flags[:run_video] = false
  279. end
  280. flags
  281. end
  282. def inline_provider_options(task_flags:)
  283. {
  284. visual_only: false,
  285. include_faces: ActiveModel::Type::Boolean.new.cast(task_flags[:analyze_faces]),
  286. include_ocr: ActiveModel::Type::Boolean.new.cast(task_flags[:run_ocr]),
  287. include_comment_generation: false,
  288. include_video_analysis: ActiveModel::Type::Boolean.new.cast(task_flags[:run_video])
  289. }
  290. end
  291. def retryable_profile_incomplete_block?(post:, comment_result:)
  292. return false unless ActiveModel::Type::Boolean.new.cast(comment_result[:blocked])
  293. return false unless comment_result[:reason_code].to_s == "missing_required_evidence"
  294. policy = post.metadata.is_a?(Hash) ? post.metadata["comment_generation_policy"] : nil
  295. return false unless policy.is_a?(Hash)
  296. return false if ActiveModel::Type::Boolean.new.cast(policy["history_ready"])
  297. PROFILE_INCOMPLETE_REASON_CODES.include?(policy["history_reason_code"].to_s)
  298. rescue StandardError
  299. false
  300. end
  301. def enqueue_build_history_retry_if_needed!(account:, profile:, post:)
  302. metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  303. policy = metadata["comment_generation_policy"].is_a?(Hash) ? metadata["comment_generation_policy"].deep_dup : {}
  304. retry_state = policy["retry_state"].is_a?(Hash) ? policy["retry_state"].deep_dup : {}
  305. attempts = retry_state["attempts"].to_i
  306. return { queued: false, reason: "retry_attempts_exhausted" } if attempts >= COMMENT_RETRY_MAX_ATTEMPTS
  307. history_reason_code = policy["history_reason_code"].to_s
  308. return { queued: false, reason: "history_reason_not_retryable" } unless PROFILE_INCOMPLETE_REASON_CODES.include?(history_reason_code)
  309. history_result = BuildInstagramProfileHistoryJob.enqueue_with_resume_if_needed!(
  310. account: account,
  311. profile: profile,
  312. trigger_source: "post_inline_comment_fallback",
  313. requested_by: self.class.name,
  314. resume_job: {
  315. job_class: self.class,
  316. job_kwargs: {
  317. instagram_account_id: account.id,
  318. instagram_profile_id: profile.id,
  319. instagram_profile_post_id: post.id,
  320. pipeline_mode: "inline",
  321. task_flags: {
  322. analyze_visual: false,
  323. analyze_faces: false,
  324. run_ocr: false,
  325. run_video: false,
  326. run_metadata: true,
  327. generate_comments: true,
  328. enforce_comment_evidence_policy: true,
  329. retry_on_incomplete_profile: true
  330. }
  331. }
  332. }
  333. )
  334. return { queued: false, reason: history_result[:reason] } unless ActiveModel::Type::Boolean.new.cast(history_result[:accepted])
  335. retry_state["attempts"] = attempts + 1
  336. retry_state["last_reason_code"] = history_reason_code
  337. retry_state["last_blocked_at"] = Time.current.iso8601(3)
  338. retry_state["last_enqueued_at"] = Time.current.iso8601(3)
  339. retry_state["next_run_at"] = history_result[:next_run_at].to_s.presence
  340. retry_state["job_id"] = history_result[:job_id].to_s.presence
  341. retry_state["build_history_action_log_id"] = history_result[:action_log_id].to_i if history_result[:action_log_id].present?
  342. retry_state["source"] = self.class.name
  343. retry_state["mode"] = "build_history_fallback"
  344. policy["retry_state"] = retry_state
  345. policy["updated_at"] = Time.current.iso8601(3)
  346. metadata["comment_generation_policy"] = policy
  347. post.update!(metadata: metadata)
  348. {
  349. queued: true,
  350. reason: "build_history_fallback_registered",
  351. job_id: history_result[:job_id].to_s,
  352. action_log_id: history_result[:action_log_id],
  353. next_run_at: history_result[:next_run_at].to_s
  354. }
  355. rescue StandardError => e
  356. {
  357. queued: false,
  358. reason: "retry_enqueue_failed",
  359. error_class: e.class.name,
  360. error_message: e.message.to_s
  361. }
  362. end
  363. end

app/jobs/append_profile_history_narrative_job.rb

0.0% lines covered

100.0% branches covered

17 relevant lines. 0 lines covered and 17 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class AppendProfileHistoryNarrativeJob < ApplicationJob
  2. queue_as :maintenance
  3. def perform(instagram_profile_event_id:, mode: "event", intelligence: nil)
  4. event = InstagramProfileEvent.find_by(id: instagram_profile_event_id)
  5. return unless event
  6. case mode.to_s
  7. when "event"
  8. Ai::ProfileHistoryNarrativeBuilder.append_event!(event)
  9. when "story_intelligence"
  10. payload = intelligence.is_a?(Hash) ? intelligence.deep_symbolize_keys : {}
  11. Ai::ProfileHistoryNarrativeBuilder.append_story_intelligence!(event, intelligence: payload)
  12. end
  13. rescue StandardError => e
  14. Rails.logger.warn("[AppendProfileHistoryNarrativeJob] failed for event_id=#{instagram_profile_event_id}: #{e.class}: #{e.message}")
  15. nil
  16. end
  17. end

app/jobs/application_job.rb

0.0% lines covered

100.0% branches covered

210 relevant lines. 0 lines covered and 210 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "json"
  2. class ApplicationJob < ActiveJob::Base
  3. # Automatically retry jobs that encountered a deadlock
  4. # retry_on ActiveRecord::Deadlocked
  5. # Most jobs are safe to ignore if the underlying records are no longer available
  6. # discard_on ActiveJob::DeserializationError
  7. discard_on Instagram::AuthenticationRequiredError do |job, error|
  8. context = Jobs::ContextExtractor.from_active_job_arguments(job.arguments)
  9. Rails.logger.warn(
  10. "[jobs.auth_required] #{job.class.name} discarded: #{error.message} " \
  11. "(account_id=#{context[:instagram_account_id] || '-'}, profile_id=#{context[:instagram_profile_id] || '-'})"
  12. )
  13. Ops::LiveUpdateBroadcaster.broadcast!(
  14. topic: "jobs_changed",
  15. account_id: context[:instagram_account_id],
  16. payload: {
  17. status: "discarded",
  18. reason: "authentication_required",
  19. job_class: job.class.name,
  20. instagram_account_id: context[:instagram_account_id],
  21. instagram_profile_id: context[:instagram_profile_id],
  22. instagram_profile_post_id: context[:instagram_profile_post_id]
  23. },
  24. throttle_key: "jobs_changed"
  25. )
  26. end
  27. around_perform do |job, block|
  28. context = Jobs::ContextExtractor.from_active_job_arguments(job.arguments)
  29. started_at = Time.current
  30. started_monotonic = Process.clock_gettime(Process::CLOCK_MONOTONIC) rescue nil
  31. Current.set(
  32. active_job_id: job.job_id,
  33. provider_job_id: job.provider_job_id,
  34. job_class: job.class.name,
  35. queue_name: job.queue_name,
  36. instagram_account_id: context[:instagram_account_id],
  37. instagram_profile_id: context[:instagram_profile_id]
  38. ) do
  39. Ai::ApiUsageTracker.with_context(
  40. active_job_id: job.job_id,
  41. provider_job_id: job.provider_job_id,
  42. job_class: job.class.name,
  43. queue_name: job.queue_name,
  44. instagram_account_id: context[:instagram_account_id],
  45. instagram_profile_id: context[:instagram_profile_id]
  46. ) do
  47. Ops::StructuredLogger.info(
  48. event: "job.started",
  49. payload: {
  50. active_job_id: job.job_id,
  51. job_class: job.class.name,
  52. queue_name: job.queue_name,
  53. instagram_account_id: context[:instagram_account_id],
  54. instagram_profile_id: context[:instagram_profile_id]
  55. }
  56. )
  57. Ops::LiveUpdateBroadcaster.broadcast!(
  58. topic: "jobs_changed",
  59. account_id: context[:instagram_account_id],
  60. payload: {
  61. status: "started",
  62. job_class: job.class.name,
  63. active_job_id: job.job_id,
  64. instagram_account_id: context[:instagram_account_id],
  65. instagram_profile_id: context[:instagram_profile_id],
  66. instagram_profile_post_id: context[:instagram_profile_post_id]
  67. },
  68. throttle_key: "jobs_changed"
  69. )
  70. block.call
  71. duration_ms =
  72. if started_monotonic
  73. ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_monotonic) * 1000).round
  74. end
  75. Ops::StructuredLogger.info(
  76. event: "job.completed",
  77. payload: {
  78. active_job_id: job.job_id,
  79. job_class: job.class.name,
  80. queue_name: job.queue_name,
  81. instagram_account_id: context[:instagram_account_id],
  82. instagram_profile_id: context[:instagram_profile_id],
  83. duration_ms: duration_ms
  84. }
  85. )
  86. Ops::LiveUpdateBroadcaster.broadcast!(
  87. topic: "jobs_changed",
  88. account_id: context[:instagram_account_id],
  89. payload: {
  90. status: "completed",
  91. job_class: job.class.name,
  92. active_job_id: job.job_id,
  93. instagram_account_id: context[:instagram_account_id],
  94. instagram_profile_id: context[:instagram_profile_id],
  95. instagram_profile_post_id: context[:instagram_profile_post_id]
  96. },
  97. throttle_key: "jobs_changed"
  98. )
  99. end
  100. end
  101. rescue StandardError => e
  102. begin
  103. queue_adapter = Rails.application.config.active_job.queue_adapter.to_s
  104. solid_id =
  105. begin
  106. if queue_adapter == "solid_queue"
  107. SolidQueue::Job.find_by(active_job_id: job.job_id)&.id
  108. end
  109. rescue StandardError
  110. nil
  111. end
  112. failure = BackgroundJobFailure.create!(
  113. active_job_id: job.job_id,
  114. queue_name: job.queue_name,
  115. job_class: job.class.name,
  116. arguments_json: job.send(:safe_json, job.arguments),
  117. provider_job_id: job.provider_job_id,
  118. solid_queue_job_id: solid_id,
  119. instagram_account_id: context[:instagram_account_id],
  120. instagram_profile_id: context[:instagram_profile_id],
  121. error_class: e.class.name,
  122. error_message: e.message.to_s,
  123. backtrace: Array(e.backtrace).join("\n"),
  124. failure_kind: job.send(:failure_kind_for, e),
  125. retryable: job.send(:retryable_for, e),
  126. occurred_at: Time.current,
  127. metadata: {
  128. queue_backend: queue_adapter,
  129. instagram_account_id: context[:instagram_account_id],
  130. instagram_profile_id: context[:instagram_profile_id],
  131. job_scope: context[:job_scope],
  132. context_label: context[:context_label],
  133. started_at: started_at&.iso8601,
  134. failed_at: Time.current.iso8601,
  135. duration_ms: ((Time.current - started_at) * 1000).round,
  136. locale: job.locale,
  137. timezone: job.timezone,
  138. executions: job.executions,
  139. exception_executions: job.exception_executions
  140. }
  141. )
  142. Ops::IssueTracker.record_job_failure!(
  143. job: job,
  144. exception: e,
  145. context: context,
  146. failure_record: failure
  147. )
  148. Ops::StructuredLogger.error(
  149. event: "job.failed",
  150. payload: {
  151. active_job_id: job.job_id,
  152. job_class: job.class.name,
  153. queue_name: job.queue_name,
  154. instagram_account_id: context[:instagram_account_id],
  155. instagram_profile_id: context[:instagram_profile_id],
  156. error_class: e.class.name,
  157. error_message: e.message,
  158. failure_kind: failure.failure_kind,
  159. retryable: failure.retryable?
  160. }
  161. )
  162. Ops::LiveUpdateBroadcaster.broadcast!(
  163. topic: "jobs_changed",
  164. account_id: context[:instagram_account_id],
  165. payload: {
  166. status: "failed",
  167. job_class: job.class.name,
  168. active_job_id: job.job_id,
  169. failure_kind: failure.failure_kind,
  170. instagram_account_id: context[:instagram_account_id],
  171. instagram_profile_id: context[:instagram_profile_id],
  172. instagram_profile_post_id: context[:instagram_profile_post_id]
  173. },
  174. throttle_key: "jobs_changed"
  175. )
  176. rescue StandardError
  177. # Never let failure logging take down job execution error reporting.
  178. nil
  179. end
  180. raise
  181. end
  182. private
  183. def safe_json(value)
  184. JSON.generate(value)
  185. rescue StandardError
  186. JSON.generate({ error: "unable_to_serialize_arguments" })
  187. end
  188. def failure_kind_for(error)
  189. return "authentication" if authentication_error?(error)
  190. return "transient" if transient_error?(error)
  191. "runtime"
  192. end
  193. def retryable_for(error)
  194. !authentication_error?(error)
  195. end
  196. def transient_error?(error)
  197. classes = [
  198. "Net::OpenTimeout",
  199. "Net::ReadTimeout",
  200. "Errno::ECONNRESET",
  201. "Errno::ECONNREFUSED",
  202. "Selenium::WebDriver::Error::TimeoutError"
  203. ].filter_map(&:safe_constantize)
  204. classes.any? { |klass| error.is_a?(klass) }
  205. rescue StandardError
  206. false
  207. end
  208. def authentication_error?(error)
  209. return true if error.is_a?(Instagram::AuthenticationRequiredError)
  210. msg = error.message.to_s.downcase
  211. msg.include?("stored cookies are not authenticated") ||
  212. msg.include?("authentication required") ||
  213. msg.include?("no stored cookies")
  214. end
  215. end

app/jobs/auto_engage_home_feed_job.rb

0.0% lines covered

100.0% branches covered

29 relevant lines. 0 lines covered and 29 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class AutoEngageHomeFeedJob < ApplicationJob
  2. queue_as :engagements
  3. def perform(instagram_account_id:, max_posts: 3, include_story: true, story_hold_seconds: 18)
  4. account = InstagramAccount.find(instagram_account_id)
  5. result = Instagram::Client.new(account: account).auto_engage_home_feed!(
  6. max_posts: max_posts,
  7. include_story: include_story,
  8. story_hold_seconds: story_hold_seconds
  9. )
  10. Turbo::StreamsChannel.broadcast_append_to(
  11. account,
  12. target: "notifications",
  13. partial: "shared/notification",
  14. locals: {
  15. kind: "notice",
  16. message: "Auto engagement completed for #{account.username}: posts_commented=#{result[:posts_commented]}, story_replied=#{result[:story_replied]}."
  17. }
  18. )
  19. rescue StandardError => e
  20. account ||= InstagramAccount.where(id: instagram_account_id).first
  21. Turbo::StreamsChannel.broadcast_append_to(
  22. account,
  23. target: "notifications",
  24. partial: "shared/notification",
  25. locals: { kind: "alert", message: "Auto engagement failed: #{e.message}" }
  26. ) if account
  27. raise
  28. end
  29. end

app/jobs/build_instagram_profile_history_job.rb

0.0% lines covered

100.0% branches covered

380 relevant lines. 0 lines covered and 380 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "digest"
  2. require "json"
  3. require "set"
  4. class BuildInstagramProfileHistoryJob < ApplicationJob
  5. queue_as :ai
  6. PROFILE_INCOMPLETE_REASON_CODES =
  7. if defined?(ProcessPostMetadataTaggingJob::PROFILE_INCOMPLETE_REASON_CODES)
  8. ProcessPostMetadataTaggingJob::PROFILE_INCOMPLETE_REASON_CODES
  9. else
  10. %w[
  11. latest_posts_not_analyzed
  12. insufficient_analyzed_posts
  13. no_recent_posts_available
  14. missing_structured_post_signals
  15. profile_preparation_failed
  16. profile_preparation_error
  17. ].freeze
  18. end
  19. MAX_RETRY_ATTEMPTS = ENV.fetch("PROFILE_HISTORY_BUILD_MAX_RETRY_ATTEMPTS", 8).to_i.clamp(1, 30)
  20. SHORT_RETRY_WAIT_MINUTES = ENV.fetch("PROFILE_HISTORY_BUILD_RETRY_WAIT_MINUTES", 45).to_i.clamp(10, 240)
  21. FACE_REFRESH_RETRY_WAIT_MINUTES = ENV.fetch("PROFILE_HISTORY_BUILD_FACE_REFRESH_RETRY_WAIT_MINUTES", 15).to_i.clamp(5, 120)
  22. LONG_RETRY_WAIT_HOURS = ENV.fetch("PROFILE_HISTORY_BUILD_RETRY_WAIT_HOURS", 4).to_i.clamp(1, 24)
  23. ACTIVE_LOG_LOOKBACK_HOURS = ENV.fetch("PROFILE_HISTORY_BUILD_ACTIVE_LOG_LOOKBACK_HOURS", 12).to_i.clamp(1, 72)
  24. class << self
  25. def enqueue_with_resume_if_needed!(account:, profile:, trigger_source:, requested_by:, resume_job: nil)
  26. raise ArgumentError, "account is required" unless account
  27. raise ArgumentError, "profile is required" unless profile
  28. serialized_resume = serialize_resume_job(resume_job)
  29. active_log = active_build_history_log(profile: profile)
  30. if active_log
  31. register_pending_resume_jobs!(log: active_log, jobs: [ serialized_resume ].compact, requested_by: requested_by)
  32. return {
  33. accepted: true,
  34. queued: false,
  35. registered: serialized_resume.present?,
  36. reason: "build_history_already_running",
  37. action_log_id: active_log.id,
  38. job_id: active_log.active_job_id,
  39. next_run_at: active_log.metadata.is_a?(Hash) ? active_log.metadata.dig("retry", "next_run_at") : nil
  40. }
  41. end
  42. metadata = {
  43. requested_by: requested_by.to_s.presence || name,
  44. trigger_source: trigger_source.to_s.presence || "system"
  45. }
  46. metadata["pending_resume_jobs"] = [ serialized_resume ] if serialized_resume
  47. log = profile.instagram_profile_action_logs.create!(
  48. instagram_account: account,
  49. action: "build_history",
  50. status: "queued",
  51. trigger_source: trigger_source.to_s.presence || "system",
  52. occurred_at: Time.current,
  53. metadata: metadata
  54. )
  55. job = perform_later(
  56. instagram_account_id: account.id,
  57. instagram_profile_id: profile.id,
  58. profile_action_log_id: log.id
  59. )
  60. log.update!(active_job_id: job.job_id, queue_name: job.queue_name)
  61. {
  62. accepted: true,
  63. queued: true,
  64. registered: serialized_resume.present?,
  65. reason: "build_history_queued",
  66. action_log_id: log.id,
  67. job_id: job.job_id,
  68. next_run_at: nil
  69. }
  70. rescue StandardError => e
  71. {
  72. accepted: false,
  73. queued: false,
  74. registered: false,
  75. reason: "build_history_enqueue_failed",
  76. error_class: e.class.name,
  77. error_message: e.message.to_s
  78. }
  79. end
  80. def serialize_resume_job(resume_job)
  81. return nil unless resume_job.is_a?(Hash)
  82. raw_job_class = resume_job[:job_class] || resume_job["job_class"]
  83. raw_kwargs = resume_job[:job_kwargs] || resume_job["job_kwargs"]
  84. job_class_name =
  85. case raw_job_class
  86. when Class
  87. raw_job_class.name
  88. else
  89. raw_job_class.to_s
  90. end
  91. return nil if job_class_name.blank?
  92. kwargs = raw_kwargs.is_a?(Hash) ? raw_kwargs.deep_stringify_keys : {}
  93. {
  94. "job_class" => job_class_name,
  95. "job_kwargs" => kwargs,
  96. "fingerprint" => resume_fingerprint(job_class_name: job_class_name, job_kwargs: kwargs),
  97. "registered_at" => Time.current.iso8601(3)
  98. }
  99. rescue StandardError
  100. nil
  101. end
  102. private
  103. def active_build_history_log(profile:)
  104. profile.instagram_profile_action_logs
  105. .where(action: "build_history", status: %w[queued running])
  106. .where("created_at >= ?", ACTIVE_LOG_LOOKBACK_HOURS.hours.ago)
  107. .order(created_at: :desc)
  108. .first
  109. end
  110. def register_pending_resume_jobs!(log:, jobs:, requested_by:)
  111. valid_jobs = Array(jobs).select { |row| row.is_a?(Hash) }
  112. return if valid_jobs.empty?
  113. log.with_lock do
  114. metadata = log.metadata.is_a?(Hash) ? log.metadata.deep_dup : {}
  115. pending = Array(metadata["pending_resume_jobs"]).select { |row| row.is_a?(Hash) }
  116. existing_fingerprints = pending.map { |row| row["fingerprint"].to_s }.reject(&:blank?).to_set
  117. valid_jobs.each do |row|
  118. fingerprint = row["fingerprint"].to_s
  119. next if fingerprint.present? && existing_fingerprints.include?(fingerprint)
  120. pending << row
  121. existing_fingerprints << fingerprint if fingerprint.present?
  122. end
  123. metadata["pending_resume_jobs"] = pending
  124. metadata["last_resume_registration_at"] = Time.current.iso8601(3)
  125. metadata["requested_by"] = requested_by.to_s if requested_by.to_s.present?
  126. log.update!(metadata: metadata)
  127. end
  128. rescue StandardError
  129. nil
  130. end
  131. def resume_fingerprint(job_class_name:, job_kwargs:)
  132. normalized = normalize_for_fingerprint(job_kwargs)
  133. Digest::SHA256.hexdigest("#{job_class_name}:#{JSON.generate(normalized)}")
  134. rescue StandardError
  135. Digest::SHA256.hexdigest("#{job_class_name}:#{job_kwargs}")
  136. end
  137. def normalize_for_fingerprint(value)
  138. case value
  139. when Hash
  140. value.keys.map(&:to_s).sort.each_with_object({}) do |key, hash|
  141. hash[key] = normalize_for_fingerprint(value[key] || value[key.to_sym])
  142. end
  143. when Array
  144. value.map { |row| normalize_for_fingerprint(row) }
  145. else
  146. value
  147. end
  148. end
  149. end
  150. def perform(instagram_account_id:, instagram_profile_id:, profile_action_log_id: nil, attempts: 0, resume_job: nil)
  151. account = InstagramAccount.find(instagram_account_id)
  152. profile = account.instagram_profiles.find(instagram_profile_id)
  153. action_log = find_or_create_action_log(
  154. account: account,
  155. profile: profile,
  156. profile_action_log_id: profile_action_log_id
  157. )
  158. register_incoming_resume_job!(action_log: action_log, resume_job: resume_job)
  159. action_log.mark_running!(extra_metadata: {
  160. queue_name: queue_name,
  161. active_job_id: job_id,
  162. attempts: attempts.to_i
  163. })
  164. result = Ai::ProfileHistoryBuildService.new(account: account, profile: profile).execute!
  165. history_state = result[:history_state].is_a?(Hash) ? result[:history_state] : {}
  166. reason_code = result[:reason_code].to_s
  167. reason = result[:reason].to_s
  168. status = result[:status].to_s
  169. payload = {
  170. attempts: attempts.to_i,
  171. status: status,
  172. reason_code: reason_code.presence,
  173. reason: reason.presence,
  174. history_build: history_state
  175. }.compact
  176. case status
  177. when "ready"
  178. resume_state = enqueue_pending_resume_jobs!(action_log: action_log, resume_job: resume_job)
  179. action_log.mark_succeeded!(
  180. extra_metadata: payload.merge(
  181. resume: resume_state
  182. ),
  183. log_text: "History Ready for #{profile.username}."
  184. )
  185. when "blocked"
  186. action_log.mark_succeeded!(
  187. extra_metadata: payload.merge(skipped: true),
  188. log_text: reason.presence || "History build skipped by policy."
  189. )
  190. else
  191. retry_state = schedule_retry!(
  192. account: account,
  193. profile: profile,
  194. action_log: action_log,
  195. attempts: attempts.to_i,
  196. reason_code: reason_code
  197. )
  198. if retry_state[:queued]
  199. queue_payload = payload.merge(
  200. retry: {
  201. queued: true,
  202. next_run_at: retry_state[:next_run_at].iso8601(3),
  203. retry_job_id: retry_state[:job_id],
  204. wait_seconds: retry_state[:wait_seconds]
  205. }
  206. )
  207. action_log.update!(
  208. status: "queued",
  209. finished_at: nil,
  210. metadata: merge_metadata(action_log.metadata, queue_payload),
  211. error_message: nil,
  212. log_text: "History build pending (#{reason_code.presence || 'in_progress'}). Retry scheduled at #{retry_state[:next_run_at].in_time_zone.iso8601}."
  213. )
  214. else
  215. exhausted_payload = payload.merge(
  216. retry: retry_state.except(:queued)
  217. )
  218. action_log.mark_failed!(
  219. error_message: "History build pending and retry unavailable (#{reason_code.presence || retry_state[:reason]}).",
  220. extra_metadata: exhausted_payload
  221. )
  222. end
  223. end
  224. rescue StandardError => e
  225. action_log&.mark_failed!(
  226. error_message: e.message,
  227. extra_metadata: {
  228. active_job_id: job_id,
  229. attempts: attempts.to_i
  230. }
  231. )
  232. raise
  233. end
  234. private
  235. def find_or_create_action_log(account:, profile:, profile_action_log_id:)
  236. log = profile.instagram_profile_action_logs.find_by(id: profile_action_log_id) if profile_action_log_id.present?
  237. return log if log
  238. profile.instagram_profile_action_logs.create!(
  239. instagram_account: account,
  240. action: "build_history",
  241. status: "queued",
  242. trigger_source: "job",
  243. occurred_at: Time.current,
  244. active_job_id: job_id,
  245. queue_name: queue_name,
  246. metadata: { created_by: self.class.name }
  247. )
  248. end
  249. def schedule_retry!(account:, profile:, action_log:, attempts:, reason_code:)
  250. return { queued: false, reason: "max_attempts_reached" } if attempts >= MAX_RETRY_ATTEMPTS
  251. wait_seconds = retry_wait_seconds_for(reason_code: reason_code)
  252. run_at = Time.current + wait_seconds.seconds
  253. job = self.class.set(wait_until: run_at).perform_later(
  254. instagram_account_id: account.id,
  255. instagram_profile_id: profile.id,
  256. profile_action_log_id: action_log.id,
  257. attempts: attempts + 1
  258. )
  259. {
  260. queued: true,
  261. wait_seconds: wait_seconds,
  262. next_run_at: run_at,
  263. job_id: job.job_id
  264. }
  265. rescue StandardError => e
  266. {
  267. queued: false,
  268. reason: "retry_enqueue_failed",
  269. error_class: e.class.name,
  270. error_message: e.message.to_s
  271. }
  272. end
  273. def retry_wait_seconds_for(reason_code:)
  274. code = reason_code.to_s
  275. if code == "waiting_for_face_refresh"
  276. FACE_REFRESH_RETRY_WAIT_MINUTES.minutes.to_i
  277. elsif PROFILE_INCOMPLETE_REASON_CODES.include?(code)
  278. LONG_RETRY_WAIT_HOURS.hours.to_i
  279. else
  280. SHORT_RETRY_WAIT_MINUTES.minutes.to_i
  281. end
  282. end
  283. def merge_metadata(base, extra)
  284. current = base.is_a?(Hash) ? base : {}
  285. current.merge(extra.to_h)
  286. end
  287. def register_incoming_resume_job!(action_log:, resume_job:)
  288. serialized = self.class.send(:serialize_resume_job, resume_job)
  289. return unless serialized
  290. self.class.send(
  291. :register_pending_resume_jobs!,
  292. log: action_log,
  293. jobs: [ serialized ],
  294. requested_by: action_log.metadata.is_a?(Hash) ? action_log.metadata["requested_by"] : nil
  295. )
  296. rescue StandardError
  297. nil
  298. end
  299. def enqueue_pending_resume_jobs!(action_log:, resume_job:)
  300. additional = self.class.send(:serialize_resume_job, resume_job)
  301. pending = Array(action_log.metadata.is_a?(Hash) ? action_log.metadata["pending_resume_jobs"] : nil)
  302. pending = pending.select { |row| row.is_a?(Hash) }
  303. pending << additional if additional
  304. pending = dedupe_resume_jobs(rows: pending)
  305. return { pending_count: 0, resumed_count: 0, failed_count: 0, failures: [] } if pending.empty?
  306. resumed = []
  307. failures = []
  308. still_pending = []
  309. pending.each do |row|
  310. job_class_name = row["job_class"].to_s
  311. job_class = job_class_name.safe_constantize
  312. unless job_class.respond_to?(:perform_later)
  313. failure = row.merge(
  314. "error_class" => "UnresumableJobClass",
  315. "error_message" => "Job class not found or not resumable: #{job_class_name}",
  316. "failed_at" => Time.current.iso8601(3)
  317. )
  318. failures << failure
  319. still_pending << row
  320. next
  321. end
  322. kwargs = row["job_kwargs"].is_a?(Hash) ? row["job_kwargs"].deep_symbolize_keys : {}
  323. job = job_class.perform_later(**kwargs)
  324. resumed << row.merge(
  325. "resumed_job_id" => job.job_id,
  326. "resumed_queue_name" => job.queue_name,
  327. "resumed_at" => Time.current.iso8601(3)
  328. )
  329. rescue StandardError => e
  330. failure = row.merge(
  331. "error_class" => e.class.name,
  332. "error_message" => e.message.to_s,
  333. "failed_at" => Time.current.iso8601(3)
  334. )
  335. failures << failure
  336. still_pending << row
  337. end
  338. action_log.with_lock do
  339. metadata = action_log.metadata.is_a?(Hash) ? action_log.metadata.deep_dup : {}
  340. existing_resumed = Array(metadata["resumed_jobs"]).select { |row| row.is_a?(Hash) }
  341. metadata["resumed_jobs"] = (existing_resumed + resumed).last(60)
  342. metadata["pending_resume_jobs"] = still_pending
  343. metadata["resume_failures"] = failures.first(20) if failures.any?
  344. metadata["last_resume_attempt_at"] = Time.current.iso8601(3)
  345. action_log.update!(metadata: metadata)
  346. end
  347. {
  348. pending_count: pending.length,
  349. resumed_count: resumed.length,
  350. failed_count: failures.length,
  351. failures: failures.first(20),
  352. resumed_job_ids: resumed.map { |row| row["resumed_job_id"] }.compact.first(30)
  353. }
  354. rescue StandardError => e
  355. {
  356. pending_count: 0,
  357. resumed_count: 0,
  358. failed_count: 1,
  359. failures: [
  360. {
  361. "error_class" => e.class.name,
  362. "error_message" => e.message.to_s
  363. }
  364. ]
  365. }
  366. end
  367. def dedupe_resume_jobs(rows:)
  368. seen = Set.new
  369. Array(rows).each_with_object([]) do |row, out|
  370. next unless row.is_a?(Hash)
  371. fingerprint = row["fingerprint"].to_s
  372. if fingerprint.blank?
  373. fingerprint = Digest::SHA256.hexdigest("#{row['job_class']}:#{row['job_kwargs']}")
  374. end
  375. next if seen.include?(fingerprint)
  376. seen << fingerprint
  377. out << row.merge("fingerprint" => fingerprint)
  378. end
  379. end
  380. end

app/jobs/capture_home_feed_job.rb

0.0% lines covered

100.0% branches covered

23 relevant lines. 0 lines covered and 23 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class CaptureHomeFeedJob < ApplicationJob
  2. queue_as :sync
  3. def perform(instagram_account_id:, rounds: 4, delay_seconds: 45, max_new: 20)
  4. account = InstagramAccount.find(instagram_account_id)
  5. client = Instagram::Client.new(account: account)
  6. result = client.capture_home_feed_posts!(rounds: rounds, delay_seconds: delay_seconds, max_new: max_new)
  7. Turbo::StreamsChannel.broadcast_append_to(
  8. account,
  9. target: "notifications",
  10. partial: "shared/notification",
  11. locals: { kind: "notice", message: "Feed capture completed for #{account.username}: new=#{result[:new_posts]}, seen=#{result[:seen_posts]}." }
  12. )
  13. rescue StandardError => e
  14. account ||= InstagramAccount.where(id: instagram_account_id).first
  15. Turbo::StreamsChannel.broadcast_append_to(
  16. account,
  17. target: "notifications",
  18. partial: "shared/notification",
  19. locals: { kind: "alert", message: "Feed capture failed: #{e.message}" }
  20. ) if account
  21. raise
  22. end
  23. end

app/jobs/capture_instagram_profile_posts_job.rb

0.0% lines covered

100.0% branches covered

332 relevant lines. 0 lines covered and 332 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "stringio"
  2. require "net/http"
  3. class CaptureInstagramProfilePostsJob < ApplicationJob
  4. queue_as :post_downloads
  5. DOWNLOAD_TARGET_RECENT_POSTS = 50
  6. CAPTURE_FETCH_LIMIT = 120
  7. retry_on Net::OpenTimeout, Net::ReadTimeout, wait: :polynomially_longer, attempts: 4
  8. retry_on Errno::ECONNRESET, Errno::ECONNREFUSED, wait: :polynomially_longer, attempts: 4
  9. retry_on Timeout::Error, wait: :polynomially_longer, attempts: 3
  10. def perform(instagram_account_id:, instagram_profile_id:, profile_action_log_id: nil, comments_limit: 20)
  11. account = InstagramAccount.find(instagram_account_id)
  12. profile = account.instagram_profiles.find(instagram_profile_id)
  13. comments_limit_i = comments_limit.to_i.clamp(1, 30)
  14. action_log = find_or_create_action_log(
  15. account: account,
  16. profile: profile,
  17. profile_action_log_id: profile_action_log_id
  18. )
  19. action_log.mark_running!(extra_metadata: {
  20. queue_name: queue_name,
  21. active_job_id: job_id,
  22. comments_limit: comments_limit_i
  23. })
  24. Ops::StructuredLogger.info(
  25. event: "profile_posts_capture.started",
  26. payload: {
  27. active_job_id: job_id,
  28. instagram_account_id: account.id,
  29. instagram_profile_id: profile.id,
  30. profile_username: profile.username,
  31. comments_limit: comments_limit_i
  32. }
  33. )
  34. policy_decision = Instagram::ProfileScanPolicy.new(profile: profile).decision
  35. if policy_decision[:skip_scan]
  36. if policy_decision[:reason_code].to_s == "non_personal_profile_page" || policy_decision[:reason_code].to_s == "scan_excluded_tag"
  37. Instagram::ProfileScanPolicy.mark_scan_excluded!(profile: profile)
  38. end
  39. action_log.mark_succeeded!(
  40. extra_metadata: {
  41. skipped: true,
  42. skip_reason_code: policy_decision[:reason_code],
  43. skip_reason: policy_decision[:reason],
  44. followers_count: policy_decision[:followers_count],
  45. max_followers: policy_decision[:max_followers]
  46. },
  47. log_text: "Skipped profile post capture: #{policy_decision[:reason]}"
  48. )
  49. return
  50. end
  51. collected = Instagram::ProfileAnalysisCollector.new(account: account, profile: profile).collect_and_persist!(
  52. posts_limit: CAPTURE_FETCH_LIMIT,
  53. comments_limit: comments_limit_i,
  54. track_missing_as_deleted: true,
  55. sync_source: "profile_posts_manual_capture",
  56. download_media: false
  57. )
  58. persisted_posts = Array(collected[:posts])
  59. summary = collected[:summary].is_a?(Hash) ? collected[:summary] : {}
  60. created_shortcodes = Array(summary[:created_shortcodes])
  61. updated_shortcodes = Array(summary[:updated_shortcodes])
  62. restored_shortcodes = Array(summary[:restored_shortcodes])
  63. deleted_shortcodes = Array(summary[:deleted_shortcodes])
  64. event_counts = create_post_capture_events!(
  65. profile: profile,
  66. posts: persisted_posts,
  67. created_shortcodes: created_shortcodes,
  68. restored_shortcodes: restored_shortcodes,
  69. deleted_shortcodes: deleted_shortcodes
  70. )
  71. download_plan = build_download_plan(profile: profile)
  72. queued_downloads = enqueue_profile_post_downloads!(
  73. account: account,
  74. profile: profile,
  75. posts: download_plan[:to_queue]
  76. )
  77. profile.update!(last_synced_at: Time.current)
  78. Ops::StructuredLogger.info(
  79. event: "profile_posts_capture.completed",
  80. payload: {
  81. active_job_id: job_id,
  82. instagram_account_id: account.id,
  83. instagram_profile_id: profile.id,
  84. profile_username: profile.username,
  85. fetched_posts: persisted_posts.length,
  86. created_count: summary[:created_count].to_i,
  87. restored_count: summary[:restored_count].to_i,
  88. updated_count: summary[:updated_count].to_i,
  89. unchanged_count: summary[:unchanged_count].to_i,
  90. deleted_count: summary[:deleted_count].to_i,
  91. recent_download_target: DOWNLOAD_TARGET_RECENT_POSTS,
  92. recent_downloadable_posts: download_plan[:recent_candidates].length,
  93. recent_already_downloaded: download_plan[:already_downloaded_count],
  94. recent_missing_downloads: download_plan[:missing_count],
  95. queued_download_jobs: queued_downloads[:queued_count],
  96. queue_failures: queued_downloads[:failures].length,
  97. captured_events_count: event_counts[:captured],
  98. deleted_events_count: event_counts[:deleted],
  99. restored_events_count: event_counts[:restored],
  100. downloadable_manifest_count: download_plan[:manifest].length
  101. }
  102. )
  103. Turbo::StreamsChannel.broadcast_append_to(
  104. account,
  105. target: "notifications",
  106. partial: "shared/notification",
  107. locals: {
  108. kind: "notice",
  109. message: "Post capture completed for #{profile.username}. New: #{summary[:created_count].to_i}, restored: #{summary[:restored_count].to_i}, deleted flagged: #{summary[:deleted_count].to_i}, queued downloads: #{queued_downloads[:queued_count]}, already downloaded in recent set: #{download_plan[:already_downloaded_count]}/#{DOWNLOAD_TARGET_RECENT_POSTS}."
  110. }
  111. )
  112. action_log.mark_succeeded!(
  113. extra_metadata: {
  114. fetched_posts: persisted_posts.length,
  115. created_count: summary[:created_count].to_i,
  116. restored_count: summary[:restored_count].to_i,
  117. updated_count: summary[:updated_count].to_i,
  118. unchanged_count: summary[:unchanged_count].to_i,
  119. deleted_count: summary[:deleted_count].to_i,
  120. feed_fetch: summary[:feed_fetch].is_a?(Hash) ? summary[:feed_fetch] : {},
  121. created_shortcodes: created_shortcodes.first(40),
  122. updated_shortcodes: updated_shortcodes.first(40),
  123. restored_shortcodes: restored_shortcodes.first(40),
  124. deleted_shortcodes: deleted_shortcodes.first(40),
  125. recent_download_target: DOWNLOAD_TARGET_RECENT_POSTS,
  126. recent_downloadable_posts: download_plan[:recent_candidates].length,
  127. recent_already_downloaded: download_plan[:already_downloaded_count],
  128. recent_missing_downloads: download_plan[:missing_count],
  129. queued_download_jobs: queued_downloads[:queued_count],
  130. queued_download_post_ids: queued_downloads[:post_ids].first(DOWNLOAD_TARGET_RECENT_POSTS),
  131. queue_failures: queued_downloads[:failures].first(20),
  132. download_manifest: download_plan[:manifest].first(DOWNLOAD_TARGET_RECENT_POSTS),
  133. captured_events_count: event_counts[:captured]
  134. },
  135. log_text: "Captured posts (new=#{summary[:created_count].to_i}, restored=#{summary[:restored_count].to_i}, updated=#{summary[:updated_count].to_i}, deleted=#{summary[:deleted_count].to_i}, queued_downloads=#{queued_downloads[:queued_count]}, already_downloaded_recent=#{download_plan[:already_downloaded_count]})."
  136. )
  137. rescue StandardError => e
  138. Ops::StructuredLogger.error(
  139. event: "profile_posts_capture.failed",
  140. payload: {
  141. active_job_id: job_id,
  142. instagram_account_id: account&.id,
  143. instagram_profile_id: profile&.id,
  144. error_class: e.class.name,
  145. error_message: e.message.to_s
  146. }
  147. )
  148. Turbo::StreamsChannel.broadcast_append_to(
  149. account,
  150. target: "notifications",
  151. partial: "shared/notification",
  152. locals: { kind: "alert", message: "Profile post capture failed: #{e.message}" }
  153. ) if account
  154. action_log&.mark_failed!(error_message: e.message, extra_metadata: { active_job_id: job_id })
  155. raise
  156. end
  157. private
  158. def find_or_create_action_log(account:, profile:, profile_action_log_id:)
  159. log = profile.instagram_profile_action_logs.find_by(id: profile_action_log_id) if profile_action_log_id.present?
  160. return log if log
  161. profile.instagram_profile_action_logs.create!(
  162. instagram_account: account,
  163. action: "capture_profile_posts",
  164. status: "queued",
  165. trigger_source: "job",
  166. occurred_at: Time.current,
  167. active_job_id: job_id,
  168. queue_name: queue_name,
  169. metadata: { created_by: self.class.name }
  170. )
  171. end
  172. def create_post_capture_events!(profile:, posts:, created_shortcodes:, restored_shortcodes:, deleted_shortcodes:)
  173. by_shortcode = posts.index_by { |post| post.shortcode.to_s }
  174. counts = { captured: 0, deleted: 0, restored: 0 }
  175. created_shortcodes.each do |shortcode|
  176. post = by_shortcode[shortcode.to_s] || profile.instagram_profile_posts.find_by(shortcode: shortcode.to_s)
  177. next unless post
  178. event = profile.record_event!(
  179. kind: "profile_post_captured",
  180. external_id: "profile_post_captured:#{post.shortcode}",
  181. occurred_at: post.taken_at || Time.current,
  182. metadata: profile_post_event_metadata(post: post, reason: "new_capture")
  183. )
  184. attach_post_media_to_event(event: event, post: post)
  185. counts[:captured] += 1
  186. end
  187. restored_shortcodes.each do |shortcode|
  188. post = by_shortcode[shortcode.to_s] || profile.instagram_profile_posts.find_by(shortcode: shortcode.to_s)
  189. next unless post
  190. profile.record_event!(
  191. kind: "profile_post_restored",
  192. external_id: "profile_post_restored:#{post.shortcode}:#{Time.current.utc.iso8601(6)}",
  193. occurred_at: Time.current,
  194. metadata: profile_post_event_metadata(post: post, reason: "restored_in_capture")
  195. )
  196. counts[:restored] += 1
  197. end
  198. deleted_shortcodes.each do |shortcode|
  199. post = profile.instagram_profile_posts.find_by(shortcode: shortcode.to_s)
  200. profile.record_event!(
  201. kind: "profile_post_deleted_detected",
  202. external_id: "profile_post_deleted_detected:#{shortcode}:#{Time.current.utc.iso8601(6)}",
  203. occurred_at: Time.current,
  204. metadata: {
  205. source: "profile_posts_manual_capture",
  206. shortcode: shortcode,
  207. instagram_profile_post_id: post&.id,
  208. deleted_from_source: true,
  209. preserved_in_history: true
  210. }
  211. )
  212. counts[:deleted] += 1
  213. end
  214. counts
  215. end
  216. def build_download_plan(profile:)
  217. recent_candidates = profile.instagram_profile_posts
  218. .with_attached_media
  219. .recent_first
  220. .limit(CAPTURE_FETCH_LIMIT)
  221. .select { |post| downloadable_profile_post?(post) }
  222. .first(DOWNLOAD_TARGET_RECENT_POSTS)
  223. already_downloaded_count = recent_candidates.count { |post| post.media.attached? }
  224. missing_posts = recent_candidates.reject { |post| post.media.attached? }
  225. required = [DOWNLOAD_TARGET_RECENT_POSTS - already_downloaded_count, 0].max
  226. to_queue = missing_posts.first(required)
  227. manifest = recent_candidates.map do |post|
  228. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  229. {
  230. post_id: post.id,
  231. shortcode: post.shortcode,
  232. post_kind: metadata["post_kind"].to_s.presence || "post",
  233. product_type: metadata["product_type"].to_s.presence,
  234. repost: ActiveModel::Type::Boolean.new.cast(metadata["is_repost"]),
  235. media_type: metadata["media_type"],
  236. media_id: metadata["media_id"],
  237. media_url: post.source_media_url.to_s.presence || metadata["media_url_video"].to_s.presence || metadata["media_url_image"].to_s.presence,
  238. taken_at: post.taken_at&.iso8601,
  239. downloaded: post.media.attached?
  240. }.compact
  241. end
  242. {
  243. recent_candidates: recent_candidates,
  244. already_downloaded_count: already_downloaded_count,
  245. missing_count: missing_posts.length,
  246. to_queue: to_queue,
  247. manifest: manifest
  248. }
  249. end
  250. def enqueue_profile_post_downloads!(account:, profile:, posts:)
  251. post_ids = []
  252. failures = []
  253. Array(posts).each do |post|
  254. next unless post
  255. next unless downloadable_profile_post?(post)
  256. mark_download_queued!(post: post)
  257. job = DownloadInstagramProfilePostMediaJob.perform_later(
  258. instagram_account_id: account.id,
  259. instagram_profile_id: profile.id,
  260. instagram_profile_post_id: post.id,
  261. trigger_analysis: true
  262. )
  263. post_ids << post.id
  264. profile.record_event!(
  265. kind: "profile_post_media_download_queued",
  266. external_id: "profile_post_media_download_queued:#{post.id}:#{job.job_id}",
  267. occurred_at: Time.current,
  268. metadata: {
  269. source: self.class.name,
  270. instagram_profile_post_id: post.id,
  271. shortcode: post.shortcode,
  272. active_job_id: job.job_id
  273. }
  274. )
  275. rescue StandardError => e
  276. failures << {
  277. instagram_profile_post_id: post&.id,
  278. shortcode: post&.shortcode.to_s.presence,
  279. error_class: e.class.name,
  280. error_message: e.message.to_s.byteslice(0, 220)
  281. }.compact
  282. next
  283. end
  284. {
  285. queued_count: post_ids.length,
  286. post_ids: post_ids,
  287. failures: failures
  288. }
  289. end
  290. def downloadable_profile_post?(post)
  291. return false unless post
  292. return false if ActiveModel::Type::Boolean.new.cast(post.metadata.is_a?(Hash) ? post.metadata["deleted_from_source"] : nil)
  293. source_url = post.source_media_url.to_s.strip
  294. return true if source_url.present?
  295. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  296. metadata["media_url_video"].to_s.strip.present? || metadata["media_url_image"].to_s.strip.present?
  297. end
  298. def mark_download_queued!(post:)
  299. metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  300. post.update!(
  301. metadata: metadata.merge(
  302. "download_status" => "queued",
  303. "download_queued_at" => Time.current.utc.iso8601(3),
  304. "download_queued_by" => self.class.name,
  305. "download_error" => nil
  306. )
  307. )
  308. end
  309. def profile_post_event_metadata(post:, reason:)
  310. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  311. {
  312. source: "profile_posts_manual_capture",
  313. shortcode: post.shortcode,
  314. reason: reason.to_s,
  315. instagram_profile_post_id: post.id,
  316. permalink: post.permalink_url,
  317. likes_count: post.likes_count,
  318. comments_count: post.comments_count,
  319. media_type: metadata["media_type"],
  320. media_id: metadata["media_id"],
  321. deleted_from_source: false
  322. }
  323. end
  324. def attach_post_media_to_event(event:, post:)
  325. return unless event
  326. return unless post.media.attached?
  327. return if event.media.attached?
  328. event.media.attach(post.media.blob)
  329. rescue StandardError => e
  330. Rails.logger.warn("[CaptureInstagramProfilePostsJob] unable to attach post media to event #{event&.id}: #{e.class}: #{e.message}")
  331. end
  332. end

app/jobs/check_ai_microservice_health_job.rb

0.0% lines covered

100.0% branches covered

27 relevant lines. 0 lines covered and 27 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "net/http"
  2. require "json"
  3. class CheckAiMicroserviceHealthJob < ApplicationJob
  4. queue_as :sync
  5. def perform
  6. health = Ops::LocalAiHealth.check(force: true)
  7. ok = ActiveModel::Type::Boolean.new.cast(health[:ok])
  8. message =
  9. if ok
  10. "Local AI stack healthy"
  11. else
  12. "Local AI stack unhealthy"
  13. end
  14. Ops::IssueTracker.record_ai_service_check!(
  15. ok: ok,
  16. message: message,
  17. metadata: health
  18. )
  19. rescue StandardError => e
  20. Ops::IssueTracker.record_ai_service_check!(
  21. ok: false,
  22. message: "AI microservice health check failed: #{e.message}",
  23. metadata: { error_class: e.class.name }
  24. )
  25. raise
  26. end
  27. end

app/jobs/check_queue_health_job.rb

0.0% lines covered

100.0% branches covered

6 relevant lines. 0 lines covered and 6 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class CheckQueueHealthJob < ApplicationJob
  2. queue_as :sync
  3. def perform
  4. Ops::QueueHealth.check!
  5. end
  6. end

app/jobs/concerns/scheduled_account_batching.rb

0.0% lines covered

100.0% branches covered

37 relevant lines. 0 lines covered and 37 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module ScheduledAccountBatching
  2. extend ActiveSupport::Concern
  3. MAX_ACCOUNT_BATCH_SIZE = 200
  4. MAX_CONTINUATION_WAIT_SECONDS = 300
  5. private
  6. def normalize_scheduler_params(opts, kwargs, defaults)
  7. from_opts = opts.is_a?(Hash) ? opts.symbolize_keys : {}
  8. defaults.merge(from_opts).merge(kwargs.symbolize_keys)
  9. end
  10. def load_account_batch(scope:, cursor_id:, batch_size:)
  11. table = InstagramAccount.arel_table
  12. capped_batch_size = batch_size.to_i.clamp(1, MAX_ACCOUNT_BATCH_SIZE)
  13. ordered_scope = scope.reorder(table[:id].asc)
  14. if cursor_id.to_i.positive?
  15. ordered_scope = ordered_scope.where(table[:id].gt(cursor_id.to_i))
  16. end
  17. accounts = ordered_scope.limit(capped_batch_size).to_a
  18. next_cursor_id = accounts.last&.id
  19. has_more = next_cursor_id.present? && scope.where(table[:id].gt(next_cursor_id.to_i)).exists?
  20. {
  21. accounts: accounts,
  22. batch_size: capped_batch_size,
  23. next_cursor_id: next_cursor_id,
  24. has_more: has_more
  25. }
  26. end
  27. def schedule_account_batch_continuation!(wait_seconds:, payload:)
  28. args = payload.is_a?(Hash) ? payload.compact : {}
  29. return nil if args.empty?
  30. capped_wait = wait_seconds.to_i.clamp(0, MAX_CONTINUATION_WAIT_SECONDS)
  31. if capped_wait.positive?
  32. self.class.set(wait: capped_wait.seconds).perform_later(**args)
  33. else
  34. self.class.perform_later(**args)
  35. end
  36. end
  37. end

app/jobs/download_instagram_post_media_job.rb

0.0% lines covered

100.0% branches covered

147 relevant lines. 0 lines covered and 147 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "net/http"
  2. require "digest"
  3. class DownloadInstagramPostMediaJob < ApplicationJob
  4. queue_as :post_downloads
  5. MAX_IMAGE_BYTES = 6 * 1024 * 1024
  6. MAX_VIDEO_BYTES = 80 * 1024 * 1024
  7. def perform(instagram_post_id:)
  8. post = InstagramPost.find(instagram_post_id)
  9. if post.media.attached?
  10. integrity = blob_integrity_for(post.media.blob)
  11. return if integrity[:valid]
  12. end
  13. url = post.media_url.to_s.strip
  14. return if url.blank?
  15. return if attach_media_from_local_cache!(post: post)
  16. io, content_type, filename = download(url)
  17. blob = ActiveStorage::Blob.create_and_upload!(
  18. io: io,
  19. filename: filename,
  20. content_type: content_type,
  21. identify: false
  22. )
  23. attach_blob_to_post!(post: post, blob: blob)
  24. post.update!(media_downloaded_at: Time.current)
  25. rescue StandardError
  26. post&.update!(purge_at: 6.hours.from_now) if post
  27. raise
  28. ensure
  29. begin
  30. io&.close
  31. rescue StandardError
  32. nil
  33. end
  34. end
  35. private
  36. def download(url)
  37. uri = URI.parse(url)
  38. http = Net::HTTP.new(uri.host, uri.port)
  39. http.use_ssl = (uri.scheme == "https")
  40. http.open_timeout = 10
  41. http.read_timeout = 30
  42. req = Net::HTTP::Get.new(uri.request_uri)
  43. req["Accept"] = "*/*"
  44. req["User-Agent"] = "Mozilla/5.0"
  45. res = http.request(req)
  46. raise "media download failed: HTTP #{res.code}" unless res.is_a?(Net::HTTPSuccess)
  47. body = res.body.to_s
  48. content_type = res["content-type"].to_s.split(";").first.presence || "application/octet-stream"
  49. limit = content_type.start_with?("video/") ? MAX_VIDEO_BYTES : MAX_IMAGE_BYTES
  50. raise "empty media payload" if body.bytesize <= 0
  51. raise "media too large" if body.bytesize > limit
  52. raise "unexpected html payload" if html_payload?(body)
  53. validate_known_signature!(body: body, content_type: content_type)
  54. ext = extension_for_content_type(content_type)
  55. io = StringIO.new(body)
  56. io.set_encoding(Encoding::BINARY) if io.respond_to?(:set_encoding)
  57. [io, content_type, "post_#{Digest::SHA256.hexdigest(url)[0, 12]}.#{ext}"]
  58. end
  59. def blob_integrity_for(blob)
  60. return { valid: false, reason: "missing_blob" } unless blob
  61. return { valid: false, reason: "non_positive_byte_size" } if blob.byte_size.to_i <= 0
  62. service = blob.service
  63. if service.respond_to?(:path_for, true)
  64. path = service.send(:path_for, blob.key)
  65. return { valid: false, reason: "missing_file_on_disk" } unless path && File.exist?(path)
  66. file_size = File.size(path)
  67. return { valid: false, reason: "zero_byte_file" } if file_size <= 0
  68. return { valid: false, reason: "byte_size_mismatch" } if blob.byte_size.to_i.positive? && file_size != blob.byte_size.to_i
  69. end
  70. { valid: true, reason: nil }
  71. rescue StandardError
  72. { valid: false, reason: "integrity_check_error" }
  73. end
  74. def html_payload?(body)
  75. sample = body.to_s.byteslice(0, 4096).to_s.downcase
  76. sample.include?("<html") || sample.start_with?("<!doctype html")
  77. end
  78. def validate_known_signature!(body:, content_type:)
  79. type = content_type.to_s.downcase
  80. return if type.blank?
  81. return if type.include?("octet-stream")
  82. case
  83. when type.include?("jpeg")
  84. raise "invalid jpeg signature" unless body.start_with?("\xFF\xD8".b)
  85. when type.include?("png")
  86. raise "invalid png signature" unless body.start_with?("\x89PNG\r\n\x1A\n".b)
  87. when type.include?("gif")
  88. raise "invalid gif signature" unless body.start_with?("GIF87a".b) || body.start_with?("GIF89a".b)
  89. when type.include?("webp")
  90. raise "invalid webp signature" unless body.bytesize >= 12 && body.byteslice(0, 4) == "RIFF" && body.byteslice(8, 4) == "WEBP"
  91. when type.start_with?("video/")
  92. raise "invalid video signature" unless body.bytesize >= 12 && body.byteslice(4, 4) == "ftyp"
  93. end
  94. end
  95. def attach_blob_to_post!(post:, blob:)
  96. raise "missing blob for attach" unless blob
  97. if post.media.attached? && post.media.attachment.present?
  98. attachment = post.media.attachment
  99. attachment.update!(blob: blob) if attachment.blob_id != blob.id
  100. return
  101. end
  102. post.media.attach(blob)
  103. end
  104. def extension_for_content_type(content_type)
  105. return "jpg" if content_type.include?("jpeg")
  106. return "png" if content_type.include?("png")
  107. return "webp" if content_type.include?("webp")
  108. return "gif" if content_type.include?("gif")
  109. return "mp4" if content_type.include?("mp4")
  110. return "mov" if content_type.include?("quicktime")
  111. "bin"
  112. end
  113. def attach_media_from_local_cache!(post:)
  114. blob = cached_media_blob_for(post: post)
  115. return false unless blob
  116. attach_blob_to_post!(post: post, blob: blob)
  117. post.update!(media_downloaded_at: Time.current)
  118. true
  119. rescue StandardError => e
  120. Rails.logger.warn("[DownloadInstagramPostMediaJob] local media cache attach failed post_id=#{post.id}: #{e.class}: #{e.message}")
  121. false
  122. end
  123. def cached_media_blob_for(post:)
  124. shortcode = post.shortcode.to_s.strip
  125. return nil if shortcode.blank?
  126. cached_feed_post = InstagramPost
  127. .joins(:media_attachment)
  128. .where(shortcode: shortcode)
  129. .where.not(id: post.id)
  130. .order(media_downloaded_at: :desc, id: :desc)
  131. .first
  132. if cached_feed_post&.media&.attached?
  133. blob = cached_feed_post.media.blob
  134. return blob if blob_integrity_for(blob)[:valid]
  135. end
  136. cached_profile_post = InstagramProfilePost
  137. .joins(:media_attachment)
  138. .where(shortcode: shortcode)
  139. .order(updated_at: :desc, id: :desc)
  140. .first
  141. if cached_profile_post&.media&.attached?
  142. blob = cached_profile_post.media.blob
  143. return blob if blob_integrity_for(blob)[:valid]
  144. end
  145. nil
  146. end
  147. end

app/jobs/download_instagram_profile_avatar_job.rb

0.0% lines covered

100.0% branches covered

167 relevant lines. 0 lines covered and 167 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "net/http"
  2. require "digest"
  3. require "cgi"
  4. require "uri"
  5. class DownloadInstagramProfileAvatarJob < ApplicationJob
  6. queue_as :avatars
  7. def perform(instagram_account_id:, instagram_profile_id:, broadcast: true, force: false, profile_action_log_id: nil)
  8. account = InstagramAccount.find(instagram_account_id)
  9. profile = account.instagram_profiles.find(instagram_profile_id)
  10. action_log = find_or_create_action_log(
  11. account: account,
  12. profile: profile,
  13. action: "sync_avatar",
  14. profile_action_log_id: profile_action_log_id
  15. )
  16. action_log.mark_running!(extra_metadata: { queue_name: queue_name, active_job_id: job_id, force: force })
  17. raw_url = profile.profile_pic_url.to_s
  18. url = Instagram::AvatarUrlNormalizer.normalize(raw_url)
  19. if url.blank?
  20. # Nothing to download; leave the attachment blank and allow UI default avatar fallback.
  21. profile.update!(
  22. profile_pic_url: (raw_url.present? ? nil : profile.profile_pic_url),
  23. avatar_url_fingerprint: nil,
  24. avatar_synced_at: Time.current
  25. )
  26. action_log.mark_succeeded!(
  27. extra_metadata: {
  28. skipped: true,
  29. reason: raw_url.present? ? "invalid_or_placeholder_avatar_url" : "avatar_url_blank",
  30. profile_pic_url_raw: raw_url.presence
  31. },
  32. log_text: raw_url.present? ? "Avatar URL invalid/placeholder; skipped download" : "Avatar URL blank; marked as synced with no attachment"
  33. )
  34. return
  35. end
  36. fp = url_fingerprint(url)
  37. # Skip if we already have the latest avatar attached.
  38. if profile.avatar.attached? && !force && profile.avatar_url_fingerprint.to_s == fp
  39. action_log.mark_succeeded!(log_text: "Avatar unchanged; skipped download", extra_metadata: { skipped: true })
  40. return
  41. end
  42. io, filename, content_type = fetch_url(url, user_agent: account.user_agent)
  43. attach_avatar!(
  44. profile: profile,
  45. io: io,
  46. filename: filename,
  47. content_type: content_type
  48. )
  49. avatar_changed = profile.avatar_url_fingerprint.present? && profile.avatar_url_fingerprint != fp
  50. profile.update!(avatar_url_fingerprint: fp, avatar_synced_at: Time.current)
  51. if avatar_changed
  52. event = profile.record_event!(
  53. kind: "avatar_changed",
  54. external_id: fp,
  55. occurred_at: nil,
  56. metadata: { profile_pic_url: url }
  57. )
  58. begin
  59. event.media.attach(profile.avatar.blob) if profile.avatar.attached?
  60. rescue StandardError
  61. nil
  62. end
  63. else
  64. profile.record_event!(
  65. kind: "avatar_synced",
  66. external_id: fp,
  67. occurred_at: nil,
  68. metadata: { profile_pic_url: url }
  69. )
  70. end
  71. if broadcast
  72. Turbo::StreamsChannel.broadcast_append_to(
  73. account,
  74. target: "notifications",
  75. partial: "shared/notification",
  76. locals: { kind: "notice", message: "Downloaded avatar for #{profile.username}." }
  77. )
  78. end
  79. action_log.mark_succeeded!(
  80. extra_metadata: { fingerprint: fp, avatar_changed: avatar_changed, profile_pic_url: url },
  81. log_text: "Avatar sync complete"
  82. )
  83. rescue StandardError => e
  84. if broadcast
  85. Turbo::StreamsChannel.broadcast_append_to(
  86. account,
  87. target: "notifications",
  88. partial: "shared/notification",
  89. locals: { kind: "alert", message: "Avatar download failed: #{e.message}" }
  90. )
  91. end
  92. action_log&.mark_failed!(error_message: e.message, extra_metadata: { active_job_id: job_id })
  93. raise
  94. end
  95. private
  96. def attach_avatar!(profile:, io:, filename:, content_type:)
  97. attachment = profile.avatar_attachment
  98. unless attachment.present?
  99. profile.avatar.attach(
  100. io: io,
  101. filename: filename,
  102. content_type: content_type
  103. )
  104. return
  105. end
  106. # Avoid destroying the attachment row because ActiveStorageIngestion keeps
  107. # a foreign-key reference to attachment ids for storage observability.
  108. new_blob = ActiveStorage::Blob.create_and_upload!(
  109. io: io,
  110. filename: filename,
  111. content_type: content_type
  112. )
  113. attachment.update!(blob: new_blob)
  114. end
  115. def find_or_create_action_log(account:, profile:, action:, profile_action_log_id:)
  116. log = profile.instagram_profile_action_logs.find_by(id: profile_action_log_id) if profile_action_log_id.present?
  117. return log if log
  118. profile.instagram_profile_action_logs.create!(
  119. instagram_account: account,
  120. action: action,
  121. status: "queued",
  122. trigger_source: "job",
  123. occurred_at: Time.current,
  124. active_job_id: job_id,
  125. queue_name: queue_name,
  126. metadata: { created_by: self.class.name }
  127. )
  128. end
  129. def url_fingerprint(url)
  130. uri = URI.parse(url)
  131. # Instagram CDN URLs often rotate query params; host+path is the stable signal for "same image".
  132. base = "#{uri.host}#{uri.path}"
  133. Digest::SHA256.hexdigest(base)
  134. rescue StandardError
  135. Digest::SHA256.hexdigest(url.to_s)
  136. end
  137. def fetch_url(url, user_agent:, redirects_left: 4)
  138. raise "Too many redirects" if redirects_left.negative?
  139. uri = URI.parse(url)
  140. raise "Invalid URL" unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
  141. http = Net::HTTP.new(uri.host, uri.port)
  142. http.use_ssl = (uri.scheme == "https")
  143. http.open_timeout = 10
  144. http.read_timeout = 20
  145. req = Net::HTTP::Get.new(uri.request_uri)
  146. req["User-Agent"] = user_agent.presence || "Mozilla/5.0"
  147. req["Accept"] = "image/avif,image/webp,image/apng,image/*,*/*;q=0.8"
  148. req["Referer"] = "https://www.instagram.com/"
  149. res = http.request(req)
  150. # Handle simple redirects (CDN often redirects).
  151. if res.is_a?(Net::HTTPRedirection) && res["location"].present?
  152. redirected_url = normalize_redirect_url(base_uri: uri, location: res["location"])
  153. raise "Invalid redirect URL" if redirected_url.blank?
  154. return fetch_url(redirected_url, user_agent: user_agent, redirects_left: redirects_left - 1)
  155. end
  156. raise "HTTP #{res.code}" unless res.is_a?(Net::HTTPSuccess)
  157. body = res.body
  158. raise "Empty response body" if body.blank?
  159. filename = File.basename(uri.path.presence || "avatar.jpg")
  160. filename = "avatar.jpg" if filename.blank? || filename == "/"
  161. content_type = res["content-type"].to_s.split(";").first.presence || "image/jpeg"
  162. io = StringIO.new(body)
  163. [io, filename, content_type]
  164. end
  165. def normalize_redirect_url(base_uri:, location:)
  166. target = URI.join(base_uri.to_s, location.to_s).to_s
  167. uri = URI.parse(target)
  168. return nil unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
  169. uri.to_s
  170. rescue URI::InvalidURIError, ArgumentError
  171. nil
  172. end
  173. end

app/jobs/download_instagram_profile_post_media_job.rb

0.0% lines covered

100.0% branches covered

577 relevant lines. 0 lines covered and 577 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "net/http"
  2. require "digest"
  3. require "stringio"
  4. class DownloadInstagramProfilePostMediaJob < ApplicationJob
  5. queue_as :post_downloads
  6. MAX_IMAGE_BYTES = 6 * 1024 * 1024
  7. MAX_VIDEO_BYTES = 80 * 1024 * 1024
  8. MAX_PREVIEW_IMAGE_BYTES = 3 * 1024 * 1024
  9. PROFILE_POST_PREVIEW_ENQUEUE_TTL_SECONDS = 30.minutes
  10. retry_on Net::OpenTimeout, Net::ReadTimeout, wait: :polynomially_longer, attempts: 4
  11. retry_on Errno::ECONNRESET, Errno::ECONNREFUSED, wait: :polynomially_longer, attempts: 4
  12. retry_on Timeout::Error, wait: :polynomially_longer, attempts: 3
  13. def perform(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, trigger_analysis: true)
  14. account = InstagramAccount.find(instagram_account_id)
  15. profile = account.instagram_profiles.find(instagram_profile_id)
  16. post = profile.instagram_profile_posts.find(instagram_profile_post_id)
  17. trigger_analysis_bool = ActiveModel::Type::Boolean.new.cast(trigger_analysis)
  18. analysis_state = { queued: false, reason: "analysis_trigger_disabled" }
  19. download_state = nil
  20. post.with_lock do
  21. download_state = ensure_media_downloaded!(profile: profile, post: post)
  22. should_enqueue_analysis =
  23. trigger_analysis_bool &&
  24. %w[downloaded already_downloaded].include?(download_state[:status].to_s)
  25. if should_enqueue_analysis
  26. analysis_state = enqueue_analysis_if_allowed!(account: account, profile: profile, post: post)
  27. elsif trigger_analysis_bool
  28. analysis_state = { queued: false, reason: "download_not_completed" }
  29. end
  30. end
  31. Ops::StructuredLogger.info(
  32. event: "profile_post_media_download.completed",
  33. payload: {
  34. active_job_id: job_id,
  35. instagram_account_id: account.id,
  36. instagram_profile_id: profile.id,
  37. instagram_profile_post_id: post.id,
  38. shortcode: post.shortcode,
  39. download_status: download_state[:status],
  40. download_source: download_state[:source],
  41. analysis_queued: analysis_state[:queued],
  42. analysis_reason: analysis_state[:reason],
  43. analysis_job_id: analysis_state[:job_id]
  44. }
  45. )
  46. rescue StandardError => e
  47. mark_download_failed!(post: post, error: e) if defined?(post) && post
  48. Ops::StructuredLogger.error(
  49. event: "profile_post_media_download.failed",
  50. payload: {
  51. active_job_id: job_id,
  52. instagram_account_id: account&.id || instagram_account_id,
  53. instagram_profile_id: profile&.id || instagram_profile_id,
  54. instagram_profile_post_id: post&.id || instagram_profile_post_id,
  55. error_class: e.class.name,
  56. error_message: e.message.to_s
  57. }
  58. )
  59. raise
  60. end
  61. private
  62. def ensure_media_downloaded!(profile:, post:)
  63. return mark_download_skipped!(profile: profile, post: post, reason: "deleted_from_source") if post_deleted?(post)
  64. media_url = resolve_media_url(post)
  65. return mark_download_skipped!(profile: profile, post: post, reason: "missing_media_url") if media_url.blank?
  66. attached_and_valid = false
  67. if post.media.attached?
  68. integrity = blob_integrity_for(post.media.blob)
  69. if integrity[:valid]
  70. attached_and_valid = true
  71. else
  72. mark_corrupt_media_detected!(post: post, reason: integrity[:reason])
  73. end
  74. end
  75. if attached_and_valid
  76. ensure_preview_image_for_video!(post: post, media_url: media_url)
  77. record_download_success!(profile: profile, post: post, source: "already_attached", media_url: media_url)
  78. return { status: "already_downloaded", source: "already_attached" }
  79. end
  80. if attach_media_from_local_cache!(post: post)
  81. ensure_preview_image_for_video!(post: post, media_url: media_url)
  82. record_download_success!(profile: profile, post: post, source: "local_cache", media_url: media_url)
  83. return { status: "downloaded", source: "local_cache" }
  84. end
  85. io = nil
  86. begin
  87. io, content_type, filename = download_media(media_url)
  88. blob = ActiveStorage::Blob.create_and_upload!(
  89. io: io,
  90. filename: filename,
  91. content_type: content_type,
  92. identify: false
  93. )
  94. attach_blob_to_post!(post: post, blob: blob)
  95. downloaded_bytes = io.respond_to?(:string) ? io.string.to_s : nil
  96. ensure_preview_image_for_video!(post: post, media_url: media_url, video_bytes: downloaded_bytes, content_type: content_type)
  97. post.update!(
  98. media_url_fingerprint: Digest::SHA256.hexdigest(media_url),
  99. metadata: merged_metadata(post: post).merge(
  100. "download_status" => "downloaded",
  101. "download_source" => "remote",
  102. "downloaded_at" => Time.current.utc.iso8601(3),
  103. "download_error" => nil
  104. )
  105. )
  106. record_download_success!(profile: profile, post: post, source: "remote", media_url: media_url)
  107. { status: "downloaded", source: "remote" }
  108. ensure
  109. io&.close if io.respond_to?(:close)
  110. end
  111. end
  112. def enqueue_analysis_if_allowed!(account:, profile:, post:)
  113. policy_decision = Instagram::ProfileScanPolicy.new(profile: profile).decision
  114. if policy_decision[:skip_post_analysis]
  115. Instagram::ProfileScanPolicy.mark_post_analysis_skipped!(post: post, decision: policy_decision)
  116. return {
  117. queued: false,
  118. reason: "policy_blocked",
  119. skip_reason_code: policy_decision[:reason_code].to_s
  120. }
  121. end
  122. return { queued: false, reason: "already_analyzed" } if post.ai_status.to_s == "analyzed" && post.analyzed_at.present?
  123. fingerprint = analysis_enqueue_fingerprint(post)
  124. metadata = merged_metadata(post: post)
  125. if post.ai_status.to_s == "pending" && metadata["analysis_enqueued_fingerprint"].to_s == fingerprint
  126. return { queued: false, reason: "already_queued_for_current_media" }
  127. end
  128. job = AnalyzeInstagramProfilePostJob.perform_later(
  129. instagram_account_id: account.id,
  130. instagram_profile_id: profile.id,
  131. instagram_profile_post_id: post.id,
  132. task_flags: {
  133. generate_comments: false,
  134. enforce_comment_evidence_policy: false,
  135. retry_on_incomplete_profile: false
  136. }
  137. )
  138. post.update!(
  139. ai_status: "pending",
  140. analyzed_at: nil,
  141. metadata: metadata.merge(
  142. "analysis_enqueued_at" => Time.current.utc.iso8601(3),
  143. "analysis_enqueued_by" => self.class.name,
  144. "analysis_enqueued_fingerprint" => fingerprint,
  145. "analysis_job_id" => job.job_id
  146. )
  147. )
  148. profile.record_event!(
  149. kind: "profile_post_analysis_queued",
  150. external_id: "profile_post_analysis_queued:#{post.id}:#{fingerprint}",
  151. occurred_at: Time.current,
  152. metadata: {
  153. source: self.class.name,
  154. instagram_profile_post_id: post.id,
  155. shortcode: post.shortcode,
  156. analysis_job_id: job.job_id
  157. }
  158. )
  159. { queued: true, reason: "queued", job_id: job.job_id }
  160. rescue StandardError => e
  161. Rails.logger.warn(
  162. "[DownloadInstagramProfilePostMediaJob] analysis queue failed for post_id=#{post.id}: #{e.class}: #{e.message}"
  163. )
  164. { queued: false, reason: "analysis_enqueue_failed", error_class: e.class.name, error_message: e.message.to_s }
  165. end
  166. def record_download_success!(profile:, post:, source:, media_url:)
  167. now = Time.current
  168. post.update!(
  169. metadata: merged_metadata(post: post).merge(
  170. "download_status" => "downloaded",
  171. "download_source" => source.to_s,
  172. "downloaded_at" => now.utc.iso8601(3),
  173. "download_error" => nil
  174. )
  175. )
  176. profile.record_event!(
  177. kind: "profile_post_media_downloaded",
  178. external_id: "profile_post_media_downloaded:#{post.id}:#{analysis_enqueue_fingerprint(post)}",
  179. occurred_at: now,
  180. metadata: {
  181. source: self.class.name,
  182. instagram_profile_post_id: post.id,
  183. shortcode: post.shortcode,
  184. media_url: media_url,
  185. download_source: source.to_s,
  186. media_attached: post.media.attached?
  187. }
  188. )
  189. end
  190. def mark_download_skipped!(profile:, post:, reason:)
  191. post.update!(
  192. metadata: merged_metadata(post: post).merge(
  193. "download_status" => "skipped",
  194. "download_skip_reason" => reason.to_s,
  195. "download_error" => nil,
  196. "downloaded_at" => nil
  197. )
  198. )
  199. profile.record_event!(
  200. kind: "profile_post_media_download_skipped",
  201. external_id: "profile_post_media_download_skipped:#{post.id}:#{reason}",
  202. occurred_at: Time.current,
  203. metadata: {
  204. source: self.class.name,
  205. instagram_profile_post_id: post.id,
  206. shortcode: post.shortcode,
  207. reason: reason.to_s
  208. }
  209. )
  210. { status: "skipped", source: reason.to_s }
  211. end
  212. def mark_download_failed!(post:, error:)
  213. post.update!(
  214. metadata: merged_metadata(post: post).merge(
  215. "download_status" => "failed",
  216. "download_error" => "#{error.class}: #{error.message}",
  217. "downloaded_at" => nil
  218. )
  219. )
  220. rescue StandardError
  221. nil
  222. end
  223. def mark_corrupt_media_detected!(post:, reason:)
  224. post.update!(
  225. metadata: merged_metadata(post: post).merge(
  226. "download_status" => "corrupt_detected",
  227. "download_error" => "integrity_check_failed: #{reason}",
  228. "download_corrupt_detected_at" => Time.current.utc.iso8601(3)
  229. )
  230. )
  231. rescue StandardError
  232. nil
  233. end
  234. def post_deleted?(post)
  235. ActiveModel::Type::Boolean.new.cast(merged_metadata(post: post)["deleted_from_source"])
  236. end
  237. def resolve_media_url(post)
  238. primary = post.source_media_url.to_s.strip
  239. return primary if primary.present?
  240. metadata = merged_metadata(post: post)
  241. metadata["media_url_video"].to_s.strip.presence || metadata["media_url_image"].to_s.strip.presence
  242. end
  243. def merged_metadata(post:)
  244. post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  245. end
  246. def analysis_enqueue_fingerprint(post)
  247. return "blob:#{post.media.blob.checksum}" if post.media.attached? && post.media.blob&.checksum.to_s.present?
  248. return "fp:#{post.media_url_fingerprint}" if post.media_url_fingerprint.to_s.present?
  249. source = resolve_media_url(post)
  250. return "url:#{Digest::SHA256.hexdigest(source)}" if source.present?
  251. "post:#{post.id}"
  252. end
  253. def attach_media_from_local_cache!(post:)
  254. blob = cached_media_blob_for(post: post)
  255. return false unless blob
  256. source_url = resolve_media_url(post)
  257. fingerprint = source_url.present? ? Digest::SHA256.hexdigest(source_url) : post.media_url_fingerprint.to_s.presence
  258. attach_blob_to_post!(post: post, blob: blob)
  259. post.update!(
  260. media_url_fingerprint: fingerprint
  261. )
  262. attach_preview_from_local_cache!(post: post)
  263. true
  264. rescue StandardError => e
  265. Rails.logger.warn("[DownloadInstagramProfilePostMediaJob] local media cache attach failed post_id=#{post.id}: #{e.class}: #{e.message}")
  266. false
  267. end
  268. def cached_media_blob_for(post:)
  269. metadata = merged_metadata(post: post)
  270. media_id = metadata["media_id"].to_s.strip
  271. shortcode = post.shortcode.to_s.strip
  272. if media_id.present?
  273. by_media_id = InstagramProfilePost
  274. .joins(:media_attachment)
  275. .where.not(id: post.id)
  276. .where("metadata ->> 'media_id' = ?", media_id)
  277. .order(updated_at: :desc, id: :desc)
  278. .first
  279. if by_media_id&.media&.attached? && blob_integrity_for(by_media_id.media.blob)[:valid]
  280. return by_media_id.media.blob
  281. end
  282. end
  283. if shortcode.present?
  284. by_shortcode_profile = InstagramProfilePost
  285. .joins(:media_attachment)
  286. .where.not(id: post.id)
  287. .where(shortcode: shortcode)
  288. .order(updated_at: :desc, id: :desc)
  289. by_shortcode_profile.each do |candidate|
  290. next unless candidate&.media&.attached?
  291. blob = candidate.media.blob
  292. return blob if blob_integrity_for(blob)[:valid]
  293. end
  294. by_shortcode_feed = InstagramPost
  295. .joins(:media_attachment)
  296. .where(shortcode: shortcode)
  297. .order(media_downloaded_at: :desc, id: :desc)
  298. by_shortcode_feed.each do |candidate|
  299. next unless candidate&.media&.attached?
  300. blob = candidate.media.blob
  301. return blob if blob_integrity_for(blob)[:valid]
  302. end
  303. end
  304. nil
  305. end
  306. def attach_preview_from_local_cache!(post:)
  307. return false if post.preview_image.attached?
  308. metadata = merged_metadata(post: post)
  309. media_id = metadata["media_id"].to_s.strip
  310. shortcode = post.shortcode.to_s.strip
  311. if media_id.present?
  312. by_media_id = InstagramProfilePost
  313. .joins(:preview_image_attachment)
  314. .where.not(id: post.id)
  315. .where("metadata ->> 'media_id' = ?", media_id)
  316. .order(updated_at: :desc, id: :desc)
  317. .first
  318. if by_media_id&.preview_image&.attached?
  319. attach_preview_blob_to_post!(post: post, blob: by_media_id.preview_image.blob)
  320. return true
  321. end
  322. end
  323. return false if shortcode.blank?
  324. by_shortcode = InstagramProfilePost
  325. .joins(:preview_image_attachment)
  326. .where.not(id: post.id)
  327. .where(shortcode: shortcode)
  328. .order(updated_at: :desc, id: :desc)
  329. .first
  330. if by_shortcode&.preview_image&.attached?
  331. attach_preview_blob_to_post!(post: post, blob: by_shortcode.preview_image.blob)
  332. return true
  333. end
  334. false
  335. rescue StandardError
  336. false
  337. end
  338. def ensure_preview_image_for_video!(post:, media_url:, video_bytes: nil, content_type: nil)
  339. return false unless post.media.attached?
  340. return false unless post.media.blob&.content_type.to_s.start_with?("video/")
  341. return true if post.preview_image.attached?
  342. metadata = merged_metadata(post: post)
  343. if attach_preview_from_local_cache!(post: post)
  344. stamp_preview_metadata!(post: post, source: "local_cache")
  345. return true
  346. end
  347. poster_url = preferred_preview_image_url(post: post, media_url: media_url, metadata: metadata)
  348. if poster_url.present?
  349. downloaded = download_preview_image(poster_url)
  350. if downloaded
  351. attach_preview_image_bytes!(
  352. post: post,
  353. image_bytes: downloaded[:bytes],
  354. content_type: downloaded[:content_type],
  355. filename: downloaded[:filename]
  356. )
  357. stamp_preview_metadata!(post: post, source: "remote_image_url")
  358. return true
  359. end
  360. end
  361. bytes = video_bytes.to_s.b
  362. if bytes.blank? && post.media.attached? && post.media.blob.byte_size.to_i <= MAX_VIDEO_BYTES
  363. bytes = post.media.blob.download.to_s.b
  364. end
  365. if bytes.blank?
  366. enqueue_background_preview_generation!(post: post, reason: "video_bytes_missing")
  367. return false
  368. end
  369. extracted = VideoThumbnailService.new.extract_first_frame(
  370. video_bytes: bytes,
  371. reference_id: "profile_post_#{post.id}",
  372. content_type: content_type || post.media.blob.content_type
  373. )
  374. unless extracted[:ok]
  375. enqueue_background_preview_generation!(post: post, reason: extracted.dig(:metadata, :reason).to_s.presence || "ffmpeg_extract_failed")
  376. return false
  377. end
  378. attach_preview_image_bytes!(
  379. post: post,
  380. image_bytes: extracted[:image_bytes],
  381. content_type: extracted[:content_type],
  382. filename: extracted[:filename]
  383. )
  384. stamp_preview_metadata!(post: post, source: "ffmpeg_first_frame")
  385. true
  386. rescue StandardError => e
  387. Rails.logger.warn("[DownloadInstagramProfilePostMediaJob] preview attach failed post_id=#{post.id}: #{e.class}: #{e.message}")
  388. enqueue_background_preview_generation!(post: post, reason: "#{e.class}: #{e.message}")
  389. false
  390. end
  391. def preferred_preview_image_url(post:, media_url:, metadata:)
  392. candidates = [
  393. metadata["preview_image_url"],
  394. metadata["poster_url"],
  395. metadata["image_url"],
  396. metadata["media_url_image"],
  397. metadata["media_url"]
  398. ]
  399. source_media = post.source_media_url.to_s.strip
  400. source_looks_video = source_media.downcase.match?(/\.(mp4|mov|webm)(\?|$)/)
  401. candidates << source_media if source_media.present? && !source_looks_video
  402. candidates << media_url.to_s if media_url.to_s.present? && !source_looks_video
  403. candidates.compact.map { |v| v.to_s.strip }.find(&:present?)
  404. end
  405. def download_preview_image(url, redirects_left: 3)
  406. uri = URI.parse(url)
  407. return nil unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
  408. http = Net::HTTP.new(uri.host, uri.port)
  409. http.use_ssl = (uri.scheme == "https")
  410. http.open_timeout = 8
  411. http.read_timeout = 20
  412. req = Net::HTTP::Get.new(uri.request_uri)
  413. req["Accept"] = "image/*,*/*;q=0.8"
  414. req["User-Agent"] = "Mozilla/5.0"
  415. req["Referer"] = Instagram::Client::INSTAGRAM_BASE_URL
  416. res = http.request(req)
  417. if res.is_a?(Net::HTTPRedirection) && res["location"].present?
  418. return nil if redirects_left.to_i <= 0
  419. next_url = normalize_redirect_url(base_uri: uri, location: res["location"])
  420. return nil if next_url.blank?
  421. return download_preview_image(next_url, redirects_left: redirects_left.to_i - 1)
  422. end
  423. return nil unless res.is_a?(Net::HTTPSuccess)
  424. body = res.body.to_s.b
  425. return nil if body.bytesize <= 0 || body.bytesize > MAX_PREVIEW_IMAGE_BYTES
  426. return nil if html_payload?(body)
  427. content_type = res["content-type"].to_s.split(";").first.to_s
  428. return nil unless content_type.start_with?("image/")
  429. validate_known_signature!(body: body, content_type: content_type)
  430. ext = extension_for_content_type(content_type)
  431. {
  432. bytes: body,
  433. content_type: content_type,
  434. filename: "profile_post_preview_#{Digest::SHA256.hexdigest(url)[0, 12]}.#{ext}"
  435. }
  436. rescue StandardError
  437. nil
  438. end
  439. def attach_preview_image_bytes!(post:, image_bytes:, content_type:, filename:)
  440. blob = ActiveStorage::Blob.create_and_upload!(
  441. io: StringIO.new(image_bytes),
  442. filename: filename,
  443. content_type: content_type.to_s.presence || "image/jpeg",
  444. identify: false
  445. )
  446. attach_preview_blob_to_post!(post: post, blob: blob)
  447. end
  448. def attach_preview_blob_to_post!(post:, blob:)
  449. return unless blob
  450. if post.preview_image.attached? && post.preview_image.attachment.present?
  451. attachment = post.preview_image.attachment
  452. attachment.update!(blob: blob) if attachment.blob_id != blob.id
  453. return
  454. end
  455. post.preview_image.attach(blob)
  456. end
  457. def stamp_preview_metadata!(post:, source:)
  458. post.update!(
  459. metadata: merged_metadata(post: post).merge(
  460. "preview_image_status" => "attached",
  461. "preview_image_source" => source.to_s,
  462. "preview_image_attached_at" => Time.current.utc.iso8601(3)
  463. )
  464. )
  465. rescue StandardError
  466. nil
  467. end
  468. def enqueue_background_preview_generation!(post:, reason:)
  469. return if post.preview_image.attached?
  470. return unless post.media.attached?
  471. return unless post.media.blob&.content_type.to_s.start_with?("video/")
  472. cache_key = "profile_post:preview_enqueue:#{post.id}"
  473. Rails.cache.fetch(cache_key, expires_in: PROFILE_POST_PREVIEW_ENQUEUE_TTL_SECONDS) do
  474. GenerateProfilePostPreviewImageJob.perform_later(instagram_profile_post_id: post.id)
  475. true
  476. end
  477. rescue StandardError => e
  478. Rails.logger.warn(
  479. "[DownloadInstagramProfilePostMediaJob] preview enqueue failed post_id=#{post.id} " \
  480. "reason=#{reason}: #{e.class}: #{e.message}"
  481. )
  482. nil
  483. end
  484. def download_media(url, redirects_left: 4)
  485. uri = URI.parse(url)
  486. raise "invalid media URL" unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
  487. http = Net::HTTP.new(uri.host, uri.port)
  488. http.use_ssl = (uri.scheme == "https")
  489. http.open_timeout = 10
  490. http.read_timeout = 30
  491. req = Net::HTTP::Get.new(uri.request_uri)
  492. req["Accept"] = "*/*"
  493. req["User-Agent"] = "Mozilla/5.0"
  494. req["Referer"] = Instagram::Client::INSTAGRAM_BASE_URL
  495. res = http.request(req)
  496. if res.is_a?(Net::HTTPRedirection) && res["location"].present?
  497. raise "too many redirects" if redirects_left.to_i <= 0
  498. next_url = normalize_redirect_url(base_uri: uri, location: res["location"])
  499. raise "invalid redirect URL" if next_url.blank?
  500. return download_media(next_url, redirects_left: redirects_left.to_i - 1)
  501. end
  502. raise "media download failed: HTTP #{res.code}" unless res.is_a?(Net::HTTPSuccess)
  503. body = res.body.to_s
  504. content_type = res["content-type"].to_s.split(";").first.presence || "application/octet-stream"
  505. limit = content_type.start_with?("video/") ? MAX_VIDEO_BYTES : MAX_IMAGE_BYTES
  506. raise "empty media payload" if body.bytesize <= 0
  507. raise "media too large" if body.bytesize > limit
  508. raise "unexpected html payload" if html_payload?(body)
  509. validate_known_signature!(body: body, content_type: content_type)
  510. ext = extension_for_content_type(content_type)
  511. io = StringIO.new(body)
  512. io.set_encoding(Encoding::BINARY) if io.respond_to?(:set_encoding)
  513. [io, content_type, "profile_post_#{Digest::SHA256.hexdigest(url)[0, 12]}.#{ext}"]
  514. end
  515. def normalize_redirect_url(base_uri:, location:)
  516. target = URI.join(base_uri.to_s, location.to_s).to_s
  517. parsed = URI.parse(target)
  518. return nil unless parsed.is_a?(URI::HTTP) || parsed.is_a?(URI::HTTPS)
  519. parsed.to_s
  520. rescue URI::InvalidURIError, ArgumentError
  521. nil
  522. end
  523. def extension_for_content_type(content_type)
  524. return "jpg" if content_type.include?("jpeg")
  525. return "png" if content_type.include?("png")
  526. return "webp" if content_type.include?("webp")
  527. return "gif" if content_type.include?("gif")
  528. return "mp4" if content_type.include?("mp4")
  529. return "mov" if content_type.include?("quicktime")
  530. "bin"
  531. end
  532. def blob_integrity_for(blob)
  533. return { valid: false, reason: "missing_blob" } unless blob
  534. return { valid: false, reason: "non_positive_byte_size" } if blob.byte_size.to_i <= 0
  535. service = blob.service
  536. if service.respond_to?(:path_for, true)
  537. path = service.send(:path_for, blob.key)
  538. return { valid: false, reason: "missing_file_on_disk" } unless path && File.exist?(path)
  539. file_size = File.size(path)
  540. return { valid: false, reason: "zero_byte_file" } if file_size <= 0
  541. return { valid: false, reason: "byte_size_mismatch" } if blob.byte_size.to_i.positive? && file_size != blob.byte_size.to_i
  542. end
  543. { valid: true, reason: nil }
  544. rescue StandardError => e
  545. { valid: false, reason: "integrity_check_error: #{e.class}" }
  546. end
  547. def attach_blob_to_post!(post:, blob:)
  548. raise "missing blob for attach" unless blob
  549. if post.media.attached? && post.media.attachment.present?
  550. attachment = post.media.attachment
  551. attachment.update!(blob: blob) if attachment.blob_id != blob.id
  552. return
  553. end
  554. post.media.attach(blob)
  555. end
  556. def html_payload?(body)
  557. sample = body.to_s.byteslice(0, 4096).to_s.downcase
  558. sample.include?("<html") || sample.start_with?("<!doctype html")
  559. end
  560. def validate_known_signature!(body:, content_type:)
  561. type = content_type.to_s.downcase
  562. return if type.blank?
  563. return if type.include?("octet-stream")
  564. case
  565. when type.include?("jpeg")
  566. raise "invalid jpeg signature" unless body.start_with?("\xFF\xD8".b)
  567. when type.include?("png")
  568. raise "invalid png signature" unless body.start_with?("\x89PNG\r\n\x1A\n".b)
  569. when type.include?("gif")
  570. raise "invalid gif signature" unless body.start_with?("GIF87a".b) || body.start_with?("GIF89a".b)
  571. when type.include?("webp")
  572. raise "invalid webp signature" unless body.bytesize >= 12 && body.byteslice(0, 4) == "RIFF" && body.byteslice(8, 4) == "WEBP"
  573. when type.start_with?("video/")
  574. raise "invalid video signature" unless body.bytesize >= 12 && body.byteslice(4, 4) == "ftyp"
  575. end
  576. end
  577. end

app/jobs/download_missing_avatars_job.rb

0.0% lines covered

100.0% branches covered

36 relevant lines. 0 lines covered and 36 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class DownloadMissingAvatarsJob < ApplicationJob
  2. queue_as :avatars
  3. def perform(instagram_account_id:, limit: 250)
  4. account = InstagramAccount.find(instagram_account_id)
  5. limit = limit.to_i.clamp(1, 2_000)
  6. profiles = account.instagram_profiles
  7. .where.not(profile_pic_url: [nil, ""])
  8. .left_joins(:avatar_attachment)
  9. .where(active_storage_attachments: { id: nil })
  10. .limit(limit)
  11. downloaded = 0
  12. failed = 0
  13. profiles.each do |profile|
  14. begin
  15. DownloadInstagramProfileAvatarJob.perform_now(instagram_account_id: account.id, instagram_profile_id: profile.id, broadcast: false)
  16. downloaded += 1
  17. rescue StandardError
  18. failed += 1
  19. end
  20. end
  21. Turbo::StreamsChannel.broadcast_append_to(
  22. account,
  23. target: "notifications",
  24. partial: "shared/notification",
  25. locals: { kind: "notice", message: "Avatar sync complete: downloaded #{downloaded}, failed #{failed}." }
  26. )
  27. rescue StandardError => e
  28. Turbo::StreamsChannel.broadcast_append_to(
  29. account,
  30. target: "notifications",
  31. partial: "shared/notification",
  32. locals: { kind: "alert", message: "Avatar sync failed: #{e.message}" }
  33. )
  34. raise
  35. end
  36. end

app/jobs/enqueue_avatar_sync_for_all_accounts_job.rb

0.0% lines covered

100.0% branches covered

45 relevant lines. 0 lines covered and 45 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class EnqueueAvatarSyncForAllAccountsJob < ApplicationJob
  2. include ScheduledAccountBatching
  3. queue_as :avatars
  4. DEFAULT_ACCOUNT_BATCH_SIZE = ENV.fetch("AVATAR_SYNC_ACCOUNT_BATCH_SIZE", "30").to_i.clamp(5, 160)
  5. CONTINUATION_WAIT_SECONDS = ENV.fetch("AVATAR_SYNC_CONTINUATION_WAIT_SECONDS", "2").to_i.clamp(1, 90)
  6. def perform(opts = nil, **kwargs)
  7. params = normalize_scheduler_params(
  8. opts,
  9. kwargs,
  10. limit: 500,
  11. batch_size: DEFAULT_ACCOUNT_BATCH_SIZE,
  12. cursor_id: nil
  13. )
  14. limit = params[:limit].to_i.clamp(1, 2000)
  15. batch = load_account_batch(
  16. scope: InstagramAccount.all,
  17. cursor_id: params[:cursor_id],
  18. batch_size: params[:batch_size]
  19. )
  20. enqueued = 0
  21. batch[:accounts].each do |account|
  22. next if account.cookies.blank?
  23. DownloadMissingAvatarsJob.perform_later(instagram_account_id: account.id, limit: limit)
  24. enqueued += 1
  25. rescue StandardError
  26. next
  27. end
  28. continuation_job = nil
  29. if batch[:has_more]
  30. continuation_job = schedule_account_batch_continuation!(
  31. wait_seconds: CONTINUATION_WAIT_SECONDS,
  32. payload: {
  33. limit: limit,
  34. batch_size: batch[:batch_size],
  35. cursor_id: batch[:next_cursor_id]
  36. }
  37. )
  38. end
  39. {
  40. accounts_enqueued: enqueued,
  41. scanned_accounts: batch[:accounts].length,
  42. continuation_job_id: continuation_job&.job_id
  43. }
  44. end
  45. end

app/jobs/enqueue_continuous_account_processing_job.rb

0.0% lines covered

100.0% branches covered

79 relevant lines. 0 lines covered and 79 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class EnqueueContinuousAccountProcessingJob < ApplicationJob
  2. include ScheduledAccountBatching
  3. queue_as :sync
  4. DEFAULT_ACCOUNT_BATCH_SIZE = ENV.fetch("CONTINUOUS_PROCESSING_ENQUEUE_BATCH_SIZE", "25").to_i.clamp(5, 120)
  5. CONTINUATION_WAIT_SECONDS = ENV.fetch("CONTINUOUS_PROCESSING_ENQUEUE_CONTINUATION_WAIT_SECONDS", "2").to_i.clamp(1, 60)
  6. def perform(opts = nil, **kwargs)
  7. params = normalize_scheduler_params(
  8. opts,
  9. kwargs,
  10. limit: 100,
  11. batch_size: DEFAULT_ACCOUNT_BATCH_SIZE,
  12. cursor_id: nil,
  13. remaining: nil
  14. )
  15. cap = params[:limit].to_i.clamp(1, 500)
  16. remaining = params[:remaining].present? ? params[:remaining].to_i : cap
  17. remaining = remaining.clamp(0, cap)
  18. return { enqueued: 0, limit: cap, remaining: 0 } if remaining <= 0
  19. batch = load_account_batch(
  20. scope: InstagramAccount.where(continuous_processing_enabled: true),
  21. cursor_id: params[:cursor_id],
  22. batch_size: [ params[:batch_size].to_i.clamp(1, 120), remaining ].min
  23. )
  24. enqueued = 0
  25. now = Time.current
  26. batch[:accounts].each do |account|
  27. next if account.cookies.blank?
  28. next if account.continuous_processing_retry_after_at.present? && account.continuous_processing_retry_after_at > now
  29. ProcessInstagramAccountContinuouslyJob.perform_later(
  30. instagram_account_id: account.id,
  31. trigger_source: "scheduler"
  32. )
  33. enqueued += 1
  34. rescue StandardError => e
  35. Ops::StructuredLogger.warn(
  36. event: "continuous_processing.enqueue_failed",
  37. payload: {
  38. account_id: account.id,
  39. error_class: e.class.name,
  40. error_message: e.message
  41. }
  42. )
  43. end
  44. scanned = batch[:accounts].length
  45. remaining_after_batch = [ remaining - scanned, 0 ].max
  46. continuation_job = nil
  47. if batch[:has_more] && remaining_after_batch.positive?
  48. continuation_job = schedule_account_batch_continuation!(
  49. wait_seconds: CONTINUATION_WAIT_SECONDS,
  50. payload: {
  51. limit: cap,
  52. batch_size: batch[:batch_size],
  53. cursor_id: batch[:next_cursor_id],
  54. remaining: remaining_after_batch
  55. }
  56. )
  57. end
  58. Ops::StructuredLogger.info(
  59. event: "continuous_processing.batch_enqueued",
  60. payload: {
  61. limit: cap,
  62. batch_size: batch[:batch_size],
  63. scanned_accounts: scanned,
  64. enqueued_count: enqueued,
  65. remaining_after_batch: remaining_after_batch,
  66. continuation_enqueued: continuation_job.present?,
  67. continuation_job_id: continuation_job&.job_id
  68. }
  69. )
  70. {
  71. enqueued: enqueued,
  72. limit: cap,
  73. batch_size: batch[:batch_size],
  74. scanned_accounts: scanned,
  75. remaining_after_batch: remaining_after_batch,
  76. continuation_job_id: continuation_job&.job_id
  77. }
  78. end
  79. end

app/jobs/enqueue_feed_auto_engagement_for_all_accounts_job.rb

0.0% lines covered

100.0% branches covered

77 relevant lines. 0 lines covered and 77 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class EnqueueFeedAutoEngagementForAllAccountsJob < ApplicationJob
  2. include ScheduledAccountBatching
  3. queue_as :sync
  4. DEFAULT_ACCOUNT_BATCH_SIZE = ENV.fetch("FEED_AUTO_ENGAGEMENT_ACCOUNT_BATCH_SIZE", "25").to_i.clamp(5, 120)
  5. CONTINUATION_WAIT_SECONDS = ENV.fetch("FEED_AUTO_ENGAGEMENT_CONTINUATION_WAIT_SECONDS", "3").to_i.clamp(1, 90)
  6. def perform(opts = nil, **kwargs)
  7. params = normalize_scheduler_params(
  8. opts,
  9. kwargs,
  10. max_posts: 3,
  11. include_story: true,
  12. story_hold_seconds: 18,
  13. batch_size: DEFAULT_ACCOUNT_BATCH_SIZE,
  14. cursor_id: nil
  15. )
  16. max_posts_i = params[:max_posts].to_i.clamp(1, 10)
  17. include_story_bool = ActiveModel::Type::Boolean.new.cast(params[:include_story])
  18. hold_seconds_i = params[:story_hold_seconds].to_i.clamp(8, 40)
  19. batch = load_account_batch(
  20. scope: InstagramAccount.all,
  21. cursor_id: params[:cursor_id],
  22. batch_size: params[:batch_size]
  23. )
  24. enqueued = 0
  25. batch[:accounts].each do |account|
  26. next if account.cookies.blank?
  27. AutoEngageHomeFeedJob.perform_later(
  28. instagram_account_id: account.id,
  29. max_posts: max_posts_i,
  30. include_story: include_story_bool,
  31. story_hold_seconds: hold_seconds_i
  32. )
  33. enqueued += 1
  34. rescue StandardError => e
  35. Ops::StructuredLogger.warn(
  36. event: "feed_auto_engagement.enqueue_failed",
  37. payload: {
  38. account_id: account.id,
  39. error_class: e.class.name,
  40. error_message: e.message
  41. }
  42. )
  43. next
  44. end
  45. continuation_job = nil
  46. if batch[:has_more]
  47. continuation_job = schedule_account_batch_continuation!(
  48. wait_seconds: CONTINUATION_WAIT_SECONDS,
  49. payload: {
  50. max_posts: max_posts_i,
  51. include_story: include_story_bool,
  52. story_hold_seconds: hold_seconds_i,
  53. batch_size: batch[:batch_size],
  54. cursor_id: batch[:next_cursor_id]
  55. }
  56. )
  57. end
  58. Ops::StructuredLogger.info(
  59. event: "feed_auto_engagement.batch_enqueued",
  60. payload: {
  61. enqueued_accounts: enqueued,
  62. max_posts: max_posts_i,
  63. include_story: include_story_bool,
  64. story_hold_seconds: hold_seconds_i,
  65. batch_size: batch[:batch_size],
  66. scanned_accounts: batch[:accounts].length,
  67. continuation_enqueued: continuation_job.present?,
  68. continuation_job_id: continuation_job&.job_id
  69. }
  70. )
  71. {
  72. enqueued_accounts: enqueued,
  73. scanned_accounts: batch[:accounts].length,
  74. continuation_job_id: continuation_job&.job_id
  75. }
  76. end
  77. end

app/jobs/enqueue_follow_graph_sync_for_all_accounts_job.rb

0.0% lines covered

100.0% branches covered

38 relevant lines. 0 lines covered and 38 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class EnqueueFollowGraphSyncForAllAccountsJob < ApplicationJob
  2. include ScheduledAccountBatching
  3. queue_as :sync
  4. DEFAULT_ACCOUNT_BATCH_SIZE = ENV.fetch("FOLLOW_GRAPH_SYNC_ACCOUNT_BATCH_SIZE", "20").to_i.clamp(5, 120)
  5. CONTINUATION_WAIT_SECONDS = ENV.fetch("FOLLOW_GRAPH_SYNC_CONTINUATION_WAIT_SECONDS", "3").to_i.clamp(1, 90)
  6. def perform(opts = nil, **kwargs)
  7. params = normalize_scheduler_params(opts, kwargs, batch_size: DEFAULT_ACCOUNT_BATCH_SIZE, cursor_id: nil)
  8. batch = load_account_batch(
  9. scope: InstagramAccount.where.not(username: [ nil, "" ]),
  10. cursor_id: params[:cursor_id],
  11. batch_size: params[:batch_size]
  12. )
  13. enqueued = 0
  14. batch[:accounts].each do |account|
  15. next if account.cookies.blank?
  16. run = account.sync_runs.create!(kind: "follow_graph", status: "queued")
  17. SyncFollowGraphJob.perform_later(instagram_account_id: account.id, sync_run_id: run.id)
  18. enqueued += 1
  19. rescue StandardError
  20. # best-effort; errors will be recorded by ApplicationJob failure logging
  21. next
  22. end
  23. continuation_job = nil
  24. if batch[:has_more]
  25. continuation_job = schedule_account_batch_continuation!(
  26. wait_seconds: CONTINUATION_WAIT_SECONDS,
  27. payload: {
  28. batch_size: batch[:batch_size],
  29. cursor_id: batch[:next_cursor_id]
  30. }
  31. )
  32. end
  33. {
  34. accounts_enqueued: enqueued,
  35. scanned_accounts: batch[:accounts].length,
  36. continuation_job_id: continuation_job&.job_id
  37. }
  38. end
  39. end

app/jobs/enqueue_profile_refresh_for_all_accounts_job.rb

0.0% lines covered

100.0% branches covered

45 relevant lines. 0 lines covered and 45 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class EnqueueProfileRefreshForAllAccountsJob < ApplicationJob
  2. include ScheduledAccountBatching
  3. queue_as :profiles
  4. DEFAULT_ACCOUNT_BATCH_SIZE = ENV.fetch("PROFILE_REFRESH_ACCOUNT_BATCH_SIZE", "20").to_i.clamp(5, 120)
  5. CONTINUATION_WAIT_SECONDS = ENV.fetch("PROFILE_REFRESH_CONTINUATION_WAIT_SECONDS", "3").to_i.clamp(1, 90)
  6. def perform(opts = nil, **kwargs)
  7. params = normalize_scheduler_params(
  8. opts,
  9. kwargs,
  10. limit_per_account: 30,
  11. batch_size: DEFAULT_ACCOUNT_BATCH_SIZE,
  12. cursor_id: nil
  13. )
  14. limit = params[:limit_per_account].to_i.clamp(1, 500)
  15. batch = load_account_batch(
  16. scope: InstagramAccount.all,
  17. cursor_id: params[:cursor_id],
  18. batch_size: params[:batch_size]
  19. )
  20. enqueued = 0
  21. batch[:accounts].each do |account|
  22. next if account.cookies.blank?
  23. SyncNextProfilesForAccountJob.perform_later(instagram_account_id: account.id, limit: limit)
  24. enqueued += 1
  25. rescue StandardError
  26. next
  27. end
  28. continuation_job = nil
  29. if batch[:has_more]
  30. continuation_job = schedule_account_batch_continuation!(
  31. wait_seconds: CONTINUATION_WAIT_SECONDS,
  32. payload: {
  33. limit_per_account: limit,
  34. batch_size: batch[:batch_size],
  35. cursor_id: batch[:next_cursor_id]
  36. }
  37. )
  38. end
  39. {
  40. accounts_enqueued: enqueued,
  41. scanned_accounts: batch[:accounts].length,
  42. continuation_job_id: continuation_job&.job_id
  43. }
  44. end
  45. end

app/jobs/enqueue_recent_profile_post_scans_for_account_job.rb

0.0% lines covered

100.0% branches covered

176 relevant lines. 0 lines covered and 176 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "set"
  2. class EnqueueRecentProfilePostScansForAccountJob < ApplicationJob
  3. queue_as :post_downloads
  4. VISITED_TAG = SyncRecentProfilePostsForProfileJob::VISITED_TAG
  5. ANALYZED_TAG = SyncRecentProfilePostsForProfileJob::ANALYZED_TAG
  6. PRIORITY_LEVELS = %i[high medium low].freeze
  7. PROFILE_SCAN_COOLDOWN_SECONDS = ENV.fetch("PROFILE_SCAN_COOLDOWN_SECONDS", "1800").to_i.clamp(60, 12.hours.to_i)
  8. PROFILE_SCAN_REFRESH_INTERVAL_SECONDS = ENV.fetch("PROFILE_SCAN_REFRESH_INTERVAL_SECONDS", "4500").to_i.clamp(300, 12.hours.to_i)
  9. PROFILE_SCAN_ACTIVE_LOOKBACK_SECONDS = ENV.fetch("PROFILE_SCAN_ACTIVE_LOOKBACK_SECONDS", "7200").to_i.clamp(300, 24.hours.to_i)
  10. PROFILE_SCAN_INSPECTION_MULTIPLIER = ENV.fetch("PROFILE_SCAN_INSPECTION_MULTIPLIER", "8").to_i.clamp(2, 20)
  11. PROFILE_SCAN_MAX_INSPECTION = ENV.fetch("PROFILE_SCAN_MAX_INSPECTION", "320").to_i.clamp(30, 2000)
  12. def perform(instagram_account_id:, limit_per_account: 8, posts_limit: 3, comments_limit: 8)
  13. account = InstagramAccount.find(instagram_account_id)
  14. return if account.cookies.blank?
  15. now = Time.current
  16. cap = limit_per_account.to_i.clamp(1, 30)
  17. posts_limit_i = posts_limit.to_i.clamp(1, 3)
  18. comments_limit_i = comments_limit.to_i.clamp(1, 20)
  19. selection = pick_profiles_for_scan(account: account, limit: cap, now: now)
  20. active_scans = active_profile_scan_profile_ids(
  21. account: account,
  22. profile_ids: selection[:candidate_profile_ids],
  23. now: now
  24. )
  25. enqueued = 0
  26. considered_profile_id = nil
  27. skipped = []
  28. selection[:ordered_candidates].each do |candidate|
  29. break if enqueued >= cap
  30. profile = candidate[:profile]
  31. priority = candidate[:priority].to_s
  32. considered_profile_id = profile.id
  33. skip_reason = skip_reason_for_profile_scan(profile: profile, active_scans: active_scans, now: now)
  34. if skip_reason.present?
  35. skipped << { profile_id: profile.id, priority: priority, reason: skip_reason }
  36. next
  37. end
  38. SyncRecentProfilePostsForProfileJob.perform_later(
  39. instagram_account_id: account.id,
  40. instagram_profile_id: profile.id,
  41. posts_limit: posts_limit_i,
  42. comments_limit: comments_limit_i
  43. )
  44. enqueued += 1
  45. rescue StandardError => e
  46. skipped << { profile_id: profile&.id, priority: priority, reason: "enqueue_failed", error_class: e.class.name }
  47. Ops::StructuredLogger.warn(
  48. event: "profile_scan.enqueue_failed",
  49. payload: {
  50. account_id: account.id,
  51. profile_id: profile.id,
  52. error_class: e.class.name,
  53. error_message: e.message
  54. }
  55. )
  56. end
  57. persist_scheduler_cursor!(
  58. account: account,
  59. cursor_id: considered_profile_id || selection[:cursor_end_id],
  60. now: now
  61. )
  62. Ops::StructuredLogger.info(
  63. event: "profile_scan.account_batch_enqueued",
  64. payload: {
  65. account_id: account.id,
  66. candidate_profiles: selection[:candidate_profile_ids].length,
  67. selected_profiles: selection[:ordered_candidates].length,
  68. enqueued_jobs: enqueued,
  69. skipped_profiles: skipped.length,
  70. skipped_reasons: skipped.group_by { |row| row[:reason] }.transform_values(&:length),
  71. cursor_start_id: selection[:cursor_start_id],
  72. cursor_end_id: considered_profile_id || selection[:cursor_end_id],
  73. priority_counts: selection[:priority_counts],
  74. limit_per_account: cap,
  75. posts_limit: posts_limit_i,
  76. comments_limit: comments_limit_i,
  77. profile_scan_cooldown_seconds: PROFILE_SCAN_COOLDOWN_SECONDS,
  78. scan_refresh_interval_seconds: PROFILE_SCAN_REFRESH_INTERVAL_SECONDS
  79. }
  80. )
  81. rescue StandardError => e
  82. Ops::StructuredLogger.error(
  83. event: "profile_scan.account_batch_failed",
  84. payload: {
  85. account_id: instagram_account_id,
  86. error_class: e.class.name,
  87. error_message: e.message
  88. }
  89. )
  90. raise
  91. end
  92. private
  93. def pick_profiles_for_scan(account:, limit:, now:)
  94. candidate_ids = account.instagram_profiles
  95. .where("following = ? OR follows_you = ?", true, true)
  96. .order(:id)
  97. .pluck(:id)
  98. if candidate_ids.empty?
  99. return {
  100. candidate_profile_ids: [],
  101. ordered_candidates: [],
  102. cursor_start_id: account.continuous_processing_profile_scan_cursor_id,
  103. cursor_end_id: account.continuous_processing_profile_scan_cursor_id,
  104. priority_counts: {}
  105. }
  106. end
  107. cursor_start_id = account.continuous_processing_profile_scan_cursor_id
  108. rotated_ids = rotate_ids(ids: candidate_ids, cursor_id: cursor_start_id)
  109. inspection_count = [ [ limit * PROFILE_SCAN_INSPECTION_MULTIPLIER, limit ].max, PROFILE_SCAN_MAX_INSPECTION, rotated_ids.length ].min
  110. inspection_ids = rotated_ids.first(inspection_count)
  111. profile_by_id = account.instagram_profiles
  112. .where(id: inspection_ids)
  113. .includes(:profile_tags)
  114. .to_a
  115. .index_by(&:id)
  116. inspected_profiles = inspection_ids.filter_map { |id| profile_by_id[id] }
  117. eligible_profiles = inspected_profiles.reject { |profile| Instagram::ProfileScanPolicy.skip_from_cached_profile?(profile: profile) }
  118. weighted = eligible_profiles.map { |profile| { profile: profile, priority: scan_priority_for(profile: profile, now: now) } }
  119. ordered_candidates = PRIORITY_LEVELS.flat_map do |priority|
  120. weighted.select { |row| row[:priority] == priority }
  121. end
  122. {
  123. candidate_profile_ids: eligible_profiles.map(&:id),
  124. ordered_candidates: ordered_candidates,
  125. cursor_start_id: cursor_start_id,
  126. cursor_end_id: inspection_ids.last,
  127. priority_counts: weighted.group_by { |row| row[:priority] }.transform_values(&:size)
  128. }
  129. end
  130. def skip_reason_for_profile_scan(profile:, active_scans:, now:)
  131. return "already_queued_or_running" if active_scans.include?(profile.id)
  132. last_scan_at = profile.ai_last_analyzed_at || profile.last_synced_at
  133. return nil if last_scan_at.blank?
  134. return nil if last_scan_at <= (now - PROFILE_SCAN_COOLDOWN_SECONDS.seconds)
  135. last_activity_at = [ profile.last_post_at, profile.last_story_seen_at ].compact.max
  136. return nil if last_activity_at.present? && last_activity_at > last_scan_at
  137. "cooldown_active"
  138. end
  139. def scan_priority_for(profile:, now:)
  140. last_scan_at = profile.ai_last_analyzed_at || profile.last_synced_at
  141. last_activity_at = [ profile.last_post_at, profile.last_story_seen_at ].compact.max
  142. tag_names = profile.profile_tags.map { |tag| tag.name.to_s }
  143. unseen = !tag_names.include?(VISITED_TAG) || !tag_names.include?(ANALYZED_TAG)
  144. return :high if last_scan_at.blank?
  145. return :high if unseen
  146. return :high if last_activity_at.present? && last_activity_at > last_scan_at
  147. return :medium if last_scan_at <= (now - PROFILE_SCAN_REFRESH_INTERVAL_SECONDS.seconds)
  148. :low
  149. end
  150. def active_profile_scan_profile_ids(account:, profile_ids:, now:)
  151. return Set.new if profile_ids.empty?
  152. lookback = now - PROFILE_SCAN_ACTIVE_LOOKBACK_SECONDS.seconds
  153. ids = account.instagram_profile_action_logs
  154. .where(action: "analyze_profile", status: %w[queued running], instagram_profile_id: profile_ids)
  155. .where("occurred_at >= ?", lookback)
  156. .distinct
  157. .pluck(:instagram_profile_id)
  158. ids.to_set
  159. end
  160. def rotate_ids(ids:, cursor_id:)
  161. return ids if ids.empty? || cursor_id.blank?
  162. index = ids.index(cursor_id.to_i)
  163. return ids unless index
  164. ids.drop(index + 1) + ids.take(index + 1)
  165. end
  166. def persist_scheduler_cursor!(account:, cursor_id:, now:)
  167. updates = {
  168. continuous_processing_last_profile_scan_enqueued_at: now,
  169. updated_at: Time.current
  170. }
  171. updates[:continuous_processing_profile_scan_cursor_id] = cursor_id.to_i if cursor_id.present?
  172. account.update_columns(updates)
  173. rescue StandardError
  174. nil
  175. end
  176. end

app/jobs/enqueue_recent_profile_post_scans_for_all_accounts_job.rb

0.0% lines covered

100.0% branches covered

77 relevant lines. 0 lines covered and 77 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class EnqueueRecentProfilePostScansForAllAccountsJob < ApplicationJob
  2. include ScheduledAccountBatching
  3. queue_as :post_downloads
  4. DEFAULT_ACCOUNT_BATCH_SIZE = ENV.fetch("PROFILE_SCAN_ACCOUNT_BATCH_SIZE", "25").to_i.clamp(5, 120)
  5. CONTINUATION_WAIT_SECONDS = ENV.fetch("PROFILE_SCAN_CONTINUATION_WAIT_SECONDS", "3").to_i.clamp(1, 90)
  6. # Accept a single hash (e.g. from Sidekiq cron/schedule) or keyword args from perform_later(...)
  7. def perform(opts = nil, **kwargs)
  8. params = normalize_scheduler_params(
  9. opts,
  10. kwargs,
  11. limit_per_account: 8,
  12. posts_limit: 3,
  13. comments_limit: 8,
  14. batch_size: DEFAULT_ACCOUNT_BATCH_SIZE,
  15. cursor_id: nil
  16. )
  17. limit_per_account = params[:limit_per_account].to_i.clamp(1, 30)
  18. posts_limit_i = params[:posts_limit].to_i.clamp(1, 3)
  19. comments_limit_i = params[:comments_limit].to_i.clamp(1, 20)
  20. batch = load_account_batch(
  21. scope: InstagramAccount.all,
  22. cursor_id: params[:cursor_id],
  23. batch_size: params[:batch_size]
  24. )
  25. enqueued_accounts = 0
  26. batch[:accounts].each do |account|
  27. next if account.cookies.blank?
  28. EnqueueRecentProfilePostScansForAccountJob.perform_later(
  29. instagram_account_id: account.id,
  30. limit_per_account: limit_per_account,
  31. posts_limit: posts_limit_i,
  32. comments_limit: comments_limit_i
  33. )
  34. enqueued_accounts += 1
  35. rescue StandardError => e
  36. Ops::StructuredLogger.warn(
  37. event: "profile_scan.all_accounts_enqueue_failed",
  38. payload: {
  39. account_id: account.id,
  40. error_class: e.class.name,
  41. error_message: e.message
  42. }
  43. )
  44. next
  45. end
  46. continuation_job = nil
  47. if batch[:has_more]
  48. continuation_job = schedule_account_batch_continuation!(
  49. wait_seconds: CONTINUATION_WAIT_SECONDS,
  50. payload: {
  51. limit_per_account: limit_per_account,
  52. posts_limit: posts_limit_i,
  53. comments_limit: comments_limit_i,
  54. batch_size: batch[:batch_size],
  55. cursor_id: batch[:next_cursor_id]
  56. }
  57. )
  58. end
  59. Ops::StructuredLogger.info(
  60. event: "profile_scan.all_accounts_batch_enqueued",
  61. payload: {
  62. accounts_enqueued: enqueued_accounts,
  63. scanned_accounts: batch[:accounts].length,
  64. limit_per_account: limit_per_account,
  65. posts_limit: posts_limit_i,
  66. comments_limit: comments_limit_i,
  67. batch_size: batch[:batch_size],
  68. continuation_enqueued: continuation_job.present?,
  69. continuation_job_id: continuation_job&.job_id
  70. }
  71. )
  72. {
  73. accounts_enqueued: enqueued_accounts,
  74. scanned_accounts: batch[:accounts].length,
  75. continuation_job_id: continuation_job&.job_id
  76. }
  77. end
  78. end

app/jobs/enqueue_story_auto_replies_for_all_accounts_job.rb

0.0% lines covered

100.0% branches covered

79 relevant lines. 0 lines covered and 79 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class EnqueueStoryAutoRepliesForAllAccountsJob < ApplicationJob
  2. include ScheduledAccountBatching
  3. queue_as :story_downloads
  4. DEFAULT_ACCOUNT_BATCH_SIZE = ENV.fetch("STORY_AUTO_REPLY_ACCOUNT_BATCH_SIZE", "20").to_i.clamp(5, 120)
  5. CONTINUATION_WAIT_SECONDS = ENV.fetch("STORY_AUTO_REPLY_CONTINUATION_WAIT_SECONDS", "3").to_i.clamp(1, 90)
  6. def perform(opts = nil, **kwargs)
  7. params = normalize_scheduler_params(
  8. opts,
  9. kwargs,
  10. max_stories: 10,
  11. force_analyze_all: false,
  12. profile_limit: SyncProfileStoriesForAccountJob::STORY_BATCH_LIMIT,
  13. batch_size: DEFAULT_ACCOUNT_BATCH_SIZE,
  14. cursor_id: nil
  15. )
  16. max_stories_i = params[:max_stories].to_i.clamp(1, 10)
  17. force = ActiveModel::Type::Boolean.new.cast(params[:force_analyze_all])
  18. profile_limit = params[:profile_limit].to_i.clamp(1, SyncProfileStoriesForAccountJob::STORY_BATCH_LIMIT)
  19. batch = load_account_batch(
  20. scope: InstagramAccount.all,
  21. cursor_id: params[:cursor_id],
  22. batch_size: params[:batch_size]
  23. )
  24. enqueued = 0
  25. batch[:accounts].each do |account|
  26. next if account.cookies.blank?
  27. SyncProfileStoriesForAccountJob.perform_later(
  28. instagram_account_id: account.id,
  29. story_limit: profile_limit,
  30. stories_per_profile: max_stories_i,
  31. with_comments: true,
  32. require_auto_reply_tag: true,
  33. force_analyze_all: force
  34. )
  35. enqueued += 1
  36. rescue StandardError => e
  37. Ops::StructuredLogger.warn(
  38. event: "story_auto_reply.enqueue_failed",
  39. payload: {
  40. account_id: account.id,
  41. error_class: e.class.name,
  42. error_message: e.message
  43. }
  44. )
  45. next
  46. end
  47. continuation_job = nil
  48. if batch[:has_more]
  49. continuation_job = schedule_account_batch_continuation!(
  50. wait_seconds: CONTINUATION_WAIT_SECONDS,
  51. payload: {
  52. max_stories: max_stories_i,
  53. force_analyze_all: force,
  54. profile_limit: profile_limit,
  55. batch_size: batch[:batch_size],
  56. cursor_id: batch[:next_cursor_id]
  57. }
  58. )
  59. end
  60. Ops::StructuredLogger.info(
  61. event: "story_auto_reply.batch_enqueued",
  62. payload: {
  63. enqueued_accounts: enqueued,
  64. scanned_accounts: batch[:accounts].length,
  65. max_stories: max_stories_i,
  66. force_analyze_all: force,
  67. profile_limit: profile_limit,
  68. batch_size: batch[:batch_size],
  69. continuation_enqueued: continuation_job.present?,
  70. continuation_job_id: continuation_job&.job_id
  71. }
  72. )
  73. {
  74. enqueued_accounts: enqueued,
  75. scanned_accounts: batch[:accounts].length,
  76. continuation_job_id: continuation_job&.job_id
  77. }
  78. end
  79. end

app/jobs/fetch_instagram_profile_details_job.rb

0.0% lines covered

100.0% branches covered

129 relevant lines. 0 lines covered and 129 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "digest"
  2. require "uri"
  3. require "cgi"
  4. class FetchInstagramProfileDetailsJob < ApplicationJob
  5. queue_as :profiles
  6. def perform(instagram_account_id:, instagram_profile_id:, profile_action_log_id: nil)
  7. account = InstagramAccount.find(instagram_account_id)
  8. profile = account.instagram_profiles.find(instagram_profile_id)
  9. action_log = find_or_create_action_log(
  10. account: account,
  11. profile: profile,
  12. action: "fetch_profile_details",
  13. profile_action_log_id: profile_action_log_id
  14. )
  15. action_log.mark_running!(extra_metadata: { queue_name: queue_name, active_job_id: job_id })
  16. details = Instagram::Client.new(account: account).fetch_profile_details_and_verify_messageability!(username: profile.username)
  17. normalized_pic_url = Instagram::AvatarUrlNormalizer.normalize(details[:profile_pic_url])
  18. followers_count = normalize_count(details[:followers_count])
  19. prev_last_post_at = profile.last_post_at
  20. profile.update!(
  21. display_name: details[:display_name].presence || profile.display_name,
  22. profile_pic_url: normalized_pic_url.presence || profile.profile_pic_url,
  23. ig_user_id: details[:ig_user_id].presence || profile.ig_user_id,
  24. bio: details[:bio].presence || profile.bio,
  25. followers_count: followers_count || profile.followers_count,
  26. can_message: details[:can_message],
  27. restriction_reason: details[:restriction_reason],
  28. dm_interaction_state: details[:dm_state].to_s.presence || (details[:can_message] ? "messageable" : "unavailable"),
  29. dm_interaction_reason: details[:dm_reason].to_s.presence || details[:restriction_reason].to_s,
  30. dm_interaction_checked_at: Time.current,
  31. dm_interaction_retry_after_at: details[:dm_retry_after_at],
  32. last_post_at: details[:last_post_at].presence || profile.last_post_at
  33. )
  34. profile.recompute_last_active!
  35. profile.save!
  36. apply_scan_exclusion_tag!(profile: profile, details: details)
  37. # Record post activity (best-effort from API profile payload).
  38. if profile.last_post_at.present? && (prev_last_post_at.nil? || profile.last_post_at > prev_last_post_at)
  39. eid =
  40. details[:latest_post_shortcode].presence ||
  41. "post:#{profile.last_post_at.to_i}"
  42. profile.record_event!(
  43. kind: "post_detected",
  44. external_id: eid,
  45. occurred_at: profile.last_post_at,
  46. metadata: { source: "profile_page" }
  47. )
  48. end
  49. # If avatar URL changed (or we never downloaded an attachment), refresh in the background.
  50. new_url = Instagram::AvatarUrlNormalizer.normalize(profile.profile_pic_url)
  51. if new_url.present? && (profile.avatar.blank? || avatar_fp(new_url) != profile.avatar_url_fingerprint.to_s)
  52. avatar_log = profile.instagram_profile_action_logs.create!(
  53. instagram_account: account,
  54. action: "sync_avatar",
  55. status: "queued",
  56. trigger_source: "job",
  57. occurred_at: Time.current,
  58. metadata: { triggered_by: self.class.name, reason: "profile_pic_changed" }
  59. )
  60. avatar_job = DownloadInstagramProfileAvatarJob.perform_later(
  61. instagram_account_id: account.id,
  62. instagram_profile_id: profile.id,
  63. force: false,
  64. profile_action_log_id: avatar_log.id
  65. )
  66. avatar_log.update!(active_job_id: avatar_job.job_id, queue_name: avatar_job.queue_name)
  67. elsif new_url.blank? && profile.profile_pic_url.present?
  68. profile.update!(profile_pic_url: nil, avatar_url_fingerprint: nil, avatar_synced_at: Time.current)
  69. profile.record_event!(kind: "avatar_missing", external_id: "avatar_missing:#{Time.current.utc.to_date.iso8601}", metadata: { source: "profile_page" })
  70. end
  71. Turbo::StreamsChannel.broadcast_append_to(
  72. account,
  73. target: "notifications",
  74. partial: "shared/notification",
  75. locals: { kind: "notice", message: "Fetched profile details for #{profile.username}." }
  76. )
  77. action_log.mark_succeeded!(
  78. extra_metadata: { can_message: profile.can_message, last_post_at: profile.last_post_at&.iso8601 },
  79. log_text: "Fetched profile details and updated profile attributes"
  80. )
  81. rescue StandardError => e
  82. Turbo::StreamsChannel.broadcast_append_to(
  83. account,
  84. target: "notifications",
  85. partial: "shared/notification",
  86. locals: { kind: "alert", message: "Profile fetch failed: #{e.message}" }
  87. )
  88. action_log&.mark_failed!(error_message: e.message, extra_metadata: { active_job_id: job_id })
  89. raise
  90. end
  91. private
  92. def find_or_create_action_log(account:, profile:, action:, profile_action_log_id:)
  93. log = profile.instagram_profile_action_logs.find_by(id: profile_action_log_id) if profile_action_log_id.present?
  94. return log if log
  95. profile.instagram_profile_action_logs.create!(
  96. instagram_account: account,
  97. action: action,
  98. status: "queued",
  99. trigger_source: "job",
  100. occurred_at: Time.current,
  101. active_job_id: job_id,
  102. queue_name: queue_name,
  103. metadata: { created_by: self.class.name }
  104. )
  105. end
  106. def avatar_fp(url)
  107. url = CGI.unescapeHTML(url.to_s)
  108. uri = URI.parse(url)
  109. base = "#{uri.host}#{uri.path}"
  110. Digest::SHA256.hexdigest(base)
  111. rescue StandardError
  112. Digest::SHA256.hexdigest(url.to_s)
  113. end
  114. def normalize_count(value)
  115. text = value.to_s.strip
  116. return nil unless text.match?(/\A\d+\z/)
  117. text.to_i
  118. rescue StandardError
  119. nil
  120. end
  121. def apply_scan_exclusion_tag!(profile:, details:)
  122. decision = Instagram::ProfileScanPolicy.new(profile: profile, profile_details: details).decision
  123. if decision[:reason_code].to_s == "non_personal_profile_page" || decision[:reason_code].to_s == "scan_excluded_tag"
  124. Instagram::ProfileScanPolicy.mark_scan_excluded!(profile: profile)
  125. return
  126. end
  127. Instagram::ProfileScanPolicy.clear_scan_excluded!(profile: profile) unless decision[:skip_scan]
  128. rescue StandardError
  129. nil
  130. end
  131. end

app/jobs/finalize_post_analysis_pipeline_job.rb

0.0% lines covered

100.0% branches covered

308 relevant lines. 0 lines covered and 308 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class FinalizePostAnalysisPipelineJob < PostAnalysisPipelineJob
  2. queue_as :ai_visual_queue
  3. MAX_FINALIZE_ATTEMPTS = ENV.fetch("AI_PIPELINE_FINALIZE_ATTEMPTS", 30).to_i.clamp(5, 120)
  4. FINALIZER_LOCK_SECONDS = ENV.fetch("AI_PIPELINE_FINALIZER_LOCK_SECONDS", 4).to_i.clamp(2, 30)
  5. STEP_STALL_TIMEOUT_SECONDS = ENV.fetch("AI_PIPELINE_STEP_STALL_TIMEOUT_SECONDS", 180).to_i.clamp(45, 1800)
  6. def perform(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, pipeline_run_id:, attempts: 0)
  7. context = load_pipeline_context!(
  8. instagram_account_id: instagram_account_id,
  9. instagram_profile_id: instagram_profile_id,
  10. instagram_profile_post_id: instagram_profile_post_id,
  11. pipeline_run_id: pipeline_run_id
  12. )
  13. return unless context
  14. account = context[:account]
  15. profile = context[:profile]
  16. post = context[:post]
  17. pipeline_state = context[:pipeline_state]
  18. if pipeline_state.pipeline_terminal?(run_id: pipeline_run_id)
  19. Ops::StructuredLogger.info(
  20. event: "ai.pipeline.finalizer.skipped_terminal",
  21. payload: {
  22. active_job_id: job_id,
  23. instagram_account_id: account.id,
  24. instagram_profile_id: profile.id,
  25. instagram_profile_post_id: post.id,
  26. pipeline_run_id: pipeline_run_id
  27. }
  28. )
  29. return
  30. end
  31. return unless acquire_finalizer_slot?(post: post, pipeline_run_id: pipeline_run_id, attempts: attempts)
  32. maybe_enqueue_metadata_step!(context: context, pipeline_run_id: pipeline_run_id)
  33. mark_stalled_steps_failed!(context: context, pipeline_run_id: pipeline_run_id)
  34. unless pipeline_state.all_required_steps_terminal?(run_id: pipeline_run_id)
  35. if attempts.to_i >= MAX_FINALIZE_ATTEMPTS
  36. finalize_as_failed!(
  37. post: post,
  38. pipeline_state: pipeline_state,
  39. pipeline_run_id: pipeline_run_id,
  40. reason: "pipeline_timeout"
  41. )
  42. return
  43. end
  44. wait_seconds = finalize_poll_delay_seconds(attempts: attempts)
  45. self.class.set(wait: wait_seconds.seconds).perform_later(
  46. instagram_account_id: account.id,
  47. instagram_profile_id: profile.id,
  48. instagram_profile_post_id: post.id,
  49. pipeline_run_id: pipeline_run_id,
  50. attempts: attempts.to_i + 1
  51. )
  52. return
  53. end
  54. pipeline = pipeline_state.pipeline_for(run_id: pipeline_run_id)
  55. required_steps = Array(pipeline["required_steps"]).map(&:to_s)
  56. visual_status = pipeline.dig("steps", "visual", "status").to_s
  57. succeeded_steps = required_steps.select do |step|
  58. pipeline.dig("steps", step, "status").to_s == "succeeded"
  59. end
  60. overall_status =
  61. if required_steps.include?("visual")
  62. visual_status == "succeeded" ? "completed" : "failed"
  63. else
  64. succeeded_steps.any? ? "completed" : "failed"
  65. end
  66. finalize_post_record!(post: post, pipeline: pipeline, overall_status: overall_status)
  67. pipeline_state.mark_pipeline_finished!(
  68. run_id: pipeline_run_id,
  69. status: overall_status,
  70. details: {
  71. finalized_by: self.class.name,
  72. finalized_at: Time.current.iso8601(3),
  73. attempts: attempts.to_i,
  74. visual_status: visual_status
  75. }
  76. )
  77. notification_kind = overall_status == "completed" ? "notice" : "alert"
  78. notification_message =
  79. if overall_status == "completed"
  80. "Profile post analyzed: #{post.shortcode}."
  81. else
  82. "Profile post analysis degraded/failed for #{post.shortcode}."
  83. end
  84. Turbo::StreamsChannel.broadcast_append_to(
  85. account,
  86. target: "notifications",
  87. partial: "shared/notification",
  88. locals: { kind: notification_kind, message: notification_message }
  89. )
  90. rescue StandardError => e
  91. finalize_as_failed!(
  92. post: context&.dig(:post),
  93. pipeline_state: context&.dig(:pipeline_state),
  94. pipeline_run_id: pipeline_run_id,
  95. reason: format_error(e)
  96. )
  97. raise
  98. end
  99. private
  100. def acquire_finalizer_slot?(post:, pipeline_run_id:, attempts:)
  101. now = Time.current
  102. acquired = false
  103. # Every pipeline step enqueues a finalizer; this short lock serializes metadata writes.
  104. post.with_lock do
  105. metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  106. pipeline = metadata["ai_pipeline"]
  107. unless pipeline.is_a?(Hash) && pipeline["run_id"].to_s == pipeline_run_id.to_s
  108. acquired = false
  109. next
  110. end
  111. finalizer = pipeline["finalizer"].is_a?(Hash) ? pipeline["finalizer"] : {}
  112. lock_until = parse_time(finalizer["lock_until"])
  113. if lock_until.present? && lock_until > now
  114. acquired = false
  115. next
  116. end
  117. finalizer["lock_until"] = (now + FINALIZER_LOCK_SECONDS.seconds).iso8601(3)
  118. finalizer["last_started_at"] = now.iso8601(3)
  119. finalizer["last_job_id"] = job_id
  120. finalizer["last_attempt"] = attempts.to_i
  121. pipeline["finalizer"] = finalizer
  122. metadata["ai_pipeline"] = pipeline
  123. post.update!(metadata: metadata)
  124. acquired = true
  125. end
  126. acquired
  127. rescue StandardError
  128. true
  129. end
  130. def finalize_poll_delay_seconds(attempts:)
  131. value = attempts.to_i
  132. return 5 if value < 3
  133. return 10 if value < 8
  134. return 15 if value < 14
  135. return 20 if value < 20
  136. 30
  137. end
  138. def parse_time(value)
  139. return nil if value.to_s.blank?
  140. Time.zone.parse(value.to_s)
  141. rescue StandardError
  142. nil
  143. end
  144. def maybe_enqueue_metadata_step!(context:, pipeline_run_id:)
  145. pipeline_state = context[:pipeline_state]
  146. return unless pipeline_state.required_step_pending?(run_id: pipeline_run_id, step: "metadata")
  147. # Metadata tagging depends on outputs from core extraction steps.
  148. return unless pipeline_state.core_steps_terminal?(run_id: pipeline_run_id)
  149. job = ProcessPostMetadataTaggingJob.perform_later(
  150. instagram_account_id: context[:account].id,
  151. instagram_profile_id: context[:profile].id,
  152. instagram_profile_post_id: context[:post].id,
  153. pipeline_run_id: pipeline_run_id
  154. )
  155. pipeline_state.mark_step_queued!(
  156. run_id: pipeline_run_id,
  157. step: "metadata",
  158. queue_name: job.queue_name,
  159. active_job_id: job.job_id,
  160. result: {
  161. enqueued_by: self.class.name,
  162. enqueued_at: Time.current.iso8601(3)
  163. }
  164. )
  165. rescue StandardError => e
  166. pipeline_state.mark_step_completed!(
  167. run_id: pipeline_run_id,
  168. step: "metadata",
  169. status: "failed",
  170. error: format_error(e),
  171. result: {
  172. reason: "metadata_enqueue_failed"
  173. }
  174. )
  175. end
  176. def finalize_post_record!(post:, pipeline:, overall_status:)
  177. analysis = post.analysis.is_a?(Hash) ? post.analysis.deep_dup : {}
  178. metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  179. ocr_meta = metadata["ocr_analysis"].is_a?(Hash) ? metadata["ocr_analysis"] : {}
  180. if ocr_meta["ocr_text"].to_s.present?
  181. analysis["ocr_text"] = ocr_meta["ocr_text"]
  182. analysis["ocr_blocks"] = Array(ocr_meta["ocr_blocks"]).first(40)
  183. end
  184. video_meta = metadata["video_processing"].is_a?(Hash) ? metadata["video_processing"] : {}
  185. if video_meta.present?
  186. analysis["video_processing_mode"] = video_meta["processing_mode"].to_s if video_meta["processing_mode"].to_s.present?
  187. analysis["video_static_detected"] = ActiveModel::Type::Boolean.new.cast(video_meta["static"]) if video_meta.key?("static")
  188. analysis["video_semantic_route"] = video_meta["semantic_route"].to_s if video_meta["semantic_route"].to_s.present?
  189. analysis["video_duration_seconds"] = video_meta["duration_seconds"] if video_meta.key?("duration_seconds")
  190. analysis["video_context_summary"] = video_meta["context_summary"].to_s if video_meta["context_summary"].to_s.present?
  191. analysis["transcript"] = video_meta["transcript"].to_s if video_meta["transcript"].to_s.present?
  192. analysis["video_topics"] = normalize_string_array(video_meta["topics"], limit: 40)
  193. analysis["video_objects"] = normalize_string_array(video_meta["objects"], limit: 50)
  194. analysis["video_scenes"] = Array(video_meta["scenes"]).select { |row| row.is_a?(Hash) }.first(50)
  195. analysis["video_hashtags"] = normalize_string_array(video_meta["hashtags"], limit: 50)
  196. analysis["video_mentions"] = normalize_string_array(video_meta["mentions"], limit: 50)
  197. analysis["video_profile_handles"] = normalize_string_array(video_meta["profile_handles"], limit: 50)
  198. analysis["topics"] = merge_string_array(analysis["topics"], video_meta["topics"], limit: 40)
  199. analysis["objects"] = merge_string_array(analysis["objects"], video_meta["objects"], limit: 50)
  200. analysis["hashtags"] = merge_string_array(analysis["hashtags"], video_meta["hashtags"], limit: 50)
  201. analysis["mentions"] = merge_string_array(analysis["mentions"], video_meta["mentions"], limit: 50)
  202. if analysis["ocr_text"].to_s.blank? && video_meta["ocr_text"].to_s.present?
  203. analysis["ocr_text"] = video_meta["ocr_text"].to_s
  204. end
  205. if Array(analysis["ocr_blocks"]).empty?
  206. analysis["ocr_blocks"] = Array(video_meta["ocr_blocks"]).select { |row| row.is_a?(Hash) }.first(40)
  207. end
  208. end
  209. metadata["ai_pipeline"] = pipeline
  210. if overall_status == "completed"
  211. metadata.delete("ai_pipeline_failure")
  212. post.update!(
  213. analysis: analysis,
  214. metadata: metadata,
  215. ai_status: "analyzed",
  216. analyzed_at: Time.current
  217. )
  218. else
  219. post.update!(
  220. analysis: analysis,
  221. metadata: metadata,
  222. ai_status: "failed",
  223. analyzed_at: nil
  224. )
  225. end
  226. end
  227. def mark_stalled_steps_failed!(context:, pipeline_run_id:)
  228. pipeline_state = context[:pipeline_state]
  229. pipeline = pipeline_state.pipeline_for(run_id: pipeline_run_id)
  230. return unless pipeline.is_a?(Hash)
  231. required_steps = Array(pipeline["required_steps"]).map(&:to_s)
  232. return if required_steps.empty?
  233. now = Time.current
  234. required_steps.each do |step|
  235. row = pipeline.dig("steps", step)
  236. next unless row.is_a?(Hash)
  237. status = row["status"].to_s
  238. next unless status.in?(%w[queued running])
  239. age_seconds = step_age_seconds(step_row: row, pipeline: pipeline, now: now)
  240. next unless age_seconds
  241. next if age_seconds < STEP_STALL_TIMEOUT_SECONDS
  242. pipeline_state.mark_step_completed!(
  243. run_id: pipeline_run_id,
  244. step: step,
  245. status: "failed",
  246. error: "step_stalled_timeout: status=#{status} age_seconds=#{age_seconds.to_i}",
  247. result: {
  248. reason: "step_stalled_timeout",
  249. previous_status: status,
  250. age_seconds: age_seconds.to_i,
  251. timeout_seconds: STEP_STALL_TIMEOUT_SECONDS
  252. }
  253. )
  254. Ops::StructuredLogger.warn(
  255. event: "ai.pipeline.step_stalled",
  256. payload: {
  257. active_job_id: job_id,
  258. instagram_account_id: context[:account].id,
  259. instagram_profile_id: context[:profile].id,
  260. instagram_profile_post_id: context[:post].id,
  261. pipeline_run_id: pipeline_run_id,
  262. step: step,
  263. previous_status: status,
  264. age_seconds: age_seconds.to_i,
  265. timeout_seconds: STEP_STALL_TIMEOUT_SECONDS
  266. }
  267. )
  268. end
  269. rescue StandardError
  270. nil
  271. end
  272. def step_age_seconds(step_row:, pipeline:, now:)
  273. reference =
  274. parse_time(step_row["started_at"]) ||
  275. parse_time(step_row.dig("result", "enqueued_at")) ||
  276. parse_time(step_row["created_at"]) ||
  277. parse_time(pipeline["updated_at"]) ||
  278. parse_time(pipeline["created_at"])
  279. return nil unless reference
  280. (now - reference).to_f
  281. rescue StandardError
  282. nil
  283. end
  284. def finalize_as_failed!(post:, pipeline_state:, pipeline_run_id:, reason:)
  285. return unless post
  286. metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  287. metadata["ai_pipeline_failure"] = {
  288. reason: reason.to_s,
  289. failed_at: Time.current.iso8601(3),
  290. source: self.class.name
  291. }
  292. post.update!(metadata: metadata, ai_status: "failed", analyzed_at: nil)
  293. pipeline_state&.mark_pipeline_finished!(
  294. run_id: pipeline_run_id,
  295. status: "failed",
  296. details: {
  297. reason: reason.to_s,
  298. finalized_at: Time.current.iso8601(3)
  299. }
  300. )
  301. rescue StandardError
  302. nil
  303. end
  304. def normalize_string_array(values, limit:)
  305. Array(values).map(&:to_s).map(&:strip).reject(&:blank?).uniq.first(limit)
  306. end
  307. def merge_string_array(existing, incoming, limit:)
  308. normalize_string_array(Array(existing) + Array(incoming), limit: limit)
  309. end
  310. end

app/jobs/generate_llm_comment_job.rb

0.0% lines covered

100.0% branches covered

182 relevant lines. 0 lines covered and 182 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class GenerateLlmCommentJob < ApplicationJob
  2. queue_as :ai
  3. PROFILE_PREPARATION_RETRY_REASON_CODES = %w[
  4. latest_posts_not_analyzed
  5. insufficient_analyzed_posts
  6. no_recent_posts_available
  7. missing_structured_post_signals
  8. profile_preparation_failed
  9. profile_preparation_error
  10. ].freeze
  11. PROFILE_PREPARATION_RETRY_MAX_ATTEMPTS = ENV.fetch("STORY_COMMENT_PROFILE_PREPARATION_RETRY_MAX_ATTEMPTS", 3).to_i.clamp(1, 10)
  12. retry_on Net::OpenTimeout, Net::ReadTimeout, wait: :polynomially_longer, attempts: 3
  13. retry_on Errno::ECONNREFUSED, Errno::ECONNRESET, wait: :polynomially_longer, attempts: 3
  14. def perform(instagram_profile_event_id:, provider: "local", model: nil, requested_by: "system")
  15. requested_provider = provider.to_s
  16. provider = "local"
  17. event = InstagramProfileEvent.find(instagram_profile_event_id)
  18. return unless event.story_archive_item?
  19. account = event.instagram_profile&.instagram_account
  20. profile = event.instagram_profile
  21. if event.has_llm_generated_comment?
  22. event.update_columns(
  23. llm_comment_status: "completed",
  24. llm_comment_last_error: nil,
  25. updated_at: Time.current
  26. )
  27. Ops::StructuredLogger.info(
  28. event: "llm_comment.already_completed",
  29. payload: {
  30. event_id: event.id,
  31. instagram_profile_id: event.instagram_profile_id,
  32. requested_provider: requested_provider,
  33. requested_by: requested_by
  34. }
  35. )
  36. return
  37. end
  38. preparation = prepare_profile_context(profile: profile, account: account)
  39. persist_profile_preparation_snapshot(event: event, preparation: preparation)
  40. unless ActiveModel::Type::Boolean.new.cast(preparation[:ready_for_comment_generation] || preparation["ready_for_comment_generation"])
  41. reason_code = preparation[:reason_code].to_s.presence || preparation["reason_code"].to_s.presence || "profile_comment_preparation_not_ready"
  42. reason_text = preparation[:reason].to_s.presence || preparation["reason"].to_s.presence || "Profile context is not ready for grounded comment generation."
  43. raise InstagramProfileEvent::LocalStoryIntelligenceUnavailableError.new(
  44. reason_text,
  45. reason: reason_code,
  46. source: "profile_comment_preparation"
  47. )
  48. end
  49. event.mark_llm_comment_running!(job_id: job_id)
  50. result = event.generate_llm_comment!(provider: provider, model: model)
  51. Ops::StructuredLogger.info(
  52. event: "llm_comment.completed",
  53. payload: {
  54. event_id: event.id,
  55. instagram_profile_id: event.instagram_profile_id,
  56. provider: event.llm_comment_provider,
  57. requested_provider: requested_provider,
  58. model: event.llm_comment_model,
  59. relevance_score: event.llm_comment_relevance_score,
  60. requested_by: requested_by,
  61. source: result[:source]
  62. }
  63. )
  64. rescue InstagramProfileEvent::LocalStoryIntelligenceUnavailableError => e
  65. event&.mark_llm_comment_skipped!(message: e.message, reason: e.reason, source: e.source)
  66. retry_result = schedule_build_history_retry_if_needed(
  67. event: event,
  68. reason_code: e.reason,
  69. requested_provider: requested_provider,
  70. model: model,
  71. requested_by: requested_by
  72. )
  73. event&.queue_llm_comment_generation!(job_id: retry_result[:job_id]) if retry_result[:queued]
  74. Ops::StructuredLogger.warn(
  75. event: "llm_comment.skipped_no_context",
  76. payload: {
  77. event_id: event&.id,
  78. instagram_profile_id: event&.instagram_profile_id,
  79. provider: provider,
  80. requested_provider: requested_provider,
  81. model: model,
  82. requested_by: requested_by,
  83. reason: e.reason,
  84. source: e.source,
  85. error_message: e.message,
  86. retry_queued: ActiveModel::Type::Boolean.new.cast(retry_result[:queued]),
  87. retry_reason: retry_result[:reason].to_s.presence,
  88. retry_job_id: retry_result[:job_id].to_s.presence,
  89. retry_next_run_at: retry_result[:next_run_at].to_s.presence
  90. }
  91. )
  92. rescue StandardError => e
  93. event&.mark_llm_comment_failed!(error: e)
  94. Ops::StructuredLogger.error(
  95. event: "llm_comment.failed",
  96. payload: {
  97. event_id: event&.id,
  98. instagram_profile_id: event&.instagram_profile_id,
  99. provider: provider,
  100. requested_provider: requested_provider,
  101. model: model,
  102. requested_by: requested_by,
  103. error_class: e.class.name,
  104. error_message: e.message
  105. }
  106. )
  107. raise
  108. end
  109. private
  110. def prepare_profile_context(profile:, account:)
  111. return { ready_for_comment_generation: false, reason_code: "profile_missing", reason: "Profile missing for event." } unless profile && account
  112. Ai::ProfileCommentPreparationService.new(account: account, profile: profile).prepare!
  113. rescue StandardError => e
  114. {
  115. ready_for_comment_generation: false,
  116. reason_code: "profile_preparation_error",
  117. reason: e.message.to_s,
  118. error_class: e.class.name
  119. }
  120. end
  121. def persist_profile_preparation_snapshot(event:, preparation:)
  122. return unless event
  123. return unless preparation.is_a?(Hash)
  124. existing = event.llm_comment_metadata.is_a?(Hash) ? event.llm_comment_metadata.deep_dup : {}
  125. existing["profile_comment_preparation"] = preparation
  126. event.update_columns(llm_comment_metadata: existing, updated_at: Time.current)
  127. rescue StandardError
  128. nil
  129. end
  130. def schedule_build_history_retry_if_needed(event:, reason_code:, requested_provider:, model:, requested_by:)
  131. return { queued: false, reason: "event_missing" } unless event
  132. return { queued: false, reason: "reason_not_retryable" } unless PROFILE_PREPARATION_RETRY_REASON_CODES.include?(reason_code.to_s)
  133. metadata = event.llm_comment_metadata.is_a?(Hash) ? event.llm_comment_metadata.deep_dup : {}
  134. retry_state = metadata["profile_preparation_retry"].is_a?(Hash) ? metadata["profile_preparation_retry"].deep_dup : {}
  135. attempts = retry_state["attempts"].to_i
  136. return { queued: false, reason: "retry_attempts_exhausted" } if attempts >= PROFILE_PREPARATION_RETRY_MAX_ATTEMPTS
  137. profile = event.instagram_profile
  138. account = profile&.instagram_account
  139. return { queued: false, reason: "profile_missing" } unless profile && account
  140. history_result = BuildInstagramProfileHistoryJob.enqueue_with_resume_if_needed!(
  141. account: account,
  142. profile: profile,
  143. trigger_source: "story_comment_preparation_fallback",
  144. requested_by: self.class.name,
  145. resume_job: {
  146. job_class: self.class,
  147. job_kwargs: {
  148. instagram_profile_event_id: event.id,
  149. provider: requested_provider,
  150. model: model,
  151. requested_by: "profile_preparation_retry:#{requested_by}"
  152. }
  153. }
  154. )
  155. return { queued: false, reason: history_result[:reason] } unless ActiveModel::Type::Boolean.new.cast(history_result[:accepted])
  156. retry_state["attempts"] = attempts + 1
  157. retry_state["last_reason_code"] = reason_code.to_s
  158. retry_state["last_skipped_at"] = Time.current.iso8601(3)
  159. retry_state["last_enqueued_at"] = Time.current.iso8601(3)
  160. retry_state["next_run_at"] = history_result[:next_run_at].to_s.presence
  161. retry_state["job_id"] = history_result[:job_id].to_s.presence
  162. retry_state["build_history_action_log_id"] = history_result[:action_log_id].to_i if history_result[:action_log_id].present?
  163. retry_state["source"] = self.class.name
  164. retry_state["mode"] = "build_history_fallback"
  165. metadata["profile_preparation_retry"] = retry_state
  166. event.update_columns(llm_comment_metadata: metadata, updated_at: Time.current)
  167. {
  168. queued: true,
  169. reason: "build_history_fallback_registered",
  170. job_id: history_result[:job_id].to_s,
  171. action_log_id: history_result[:action_log_id],
  172. next_run_at: history_result[:next_run_at].to_s
  173. }
  174. rescue StandardError => e
  175. {
  176. queued: false,
  177. reason: "retry_enqueue_failed",
  178. error_class: e.class.name,
  179. error_message: e.message.to_s
  180. }
  181. end
  182. end

app/jobs/generate_profile_post_preview_image_job.rb

0.0% lines covered

100.0% branches covered

30 relevant lines. 0 lines covered and 30 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class GenerateProfilePostPreviewImageJob < ApplicationJob
  2. queue_as :frame_generation
  3. retry_on ActiveStorage::PreviewError, wait: :polynomially_longer, attempts: 3
  4. retry_on StandardError, wait: 10.seconds, attempts: 2
  5. def perform(instagram_profile_post_id:)
  6. post = InstagramProfilePost.find_by(id: instagram_profile_post_id)
  7. return unless post&.media&.attached?
  8. return if post.preview_image.attached?
  9. return unless post.media.blob&.content_type.to_s.start_with?("video/")
  10. preview = post.media.preview(resize_to_limit: [ 640, 640 ]).processed
  11. preview_image = preview.image
  12. return unless preview_image&.attached?
  13. post.with_lock do
  14. return if post.preview_image.attached?
  15. post.preview_image.attach(preview_image.blob)
  16. metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  17. post.update!(
  18. metadata: metadata.merge(
  19. "preview_image_status" => "attached",
  20. "preview_image_source" => "active_storage_preview_job",
  21. "preview_image_attached_at" => Time.current.utc.iso8601(3)
  22. )
  23. )
  24. end
  25. Rails.logger.info("[GenerateProfilePostPreviewImageJob] attached preview_image post_id=#{post.id} blob_id=#{preview_image.blob.id}")
  26. rescue StandardError => e
  27. Rails.logger.warn("[GenerateProfilePostPreviewImageJob] failed post_id=#{instagram_profile_post_id}: #{e.class}: #{e.message}")
  28. raise
  29. end
  30. end

app/jobs/generate_story_preview_image_job.rb

0.0% lines covered

100.0% branches covered

22 relevant lines. 0 lines covered and 22 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class GenerateStoryPreviewImageJob < ApplicationJob
  2. queue_as :frame_generation
  3. retry_on ActiveStorage::PreviewError, wait: :polynomially_longer, attempts: 3
  4. retry_on StandardError, wait: 10.seconds, attempts: 2
  5. def perform(instagram_profile_event_id:)
  6. event = InstagramProfileEvent.find_by(id: instagram_profile_event_id)
  7. return unless event&.media&.attached?
  8. return if event.preview_image.attached?
  9. return unless event.media.blob&.content_type.to_s.start_with?("video/")
  10. preview = event.media.preview(resize_to_limit: [640, 640]).processed
  11. preview_image = preview.image
  12. return unless preview_image&.attached?
  13. event.with_lock do
  14. return if event.preview_image.attached?
  15. event.preview_image.attach(preview_image.blob)
  16. end
  17. Rails.logger.info("[GenerateStoryPreviewImageJob] attached preview_image event_id=#{event.id} blob_id=#{preview_image.blob.id}")
  18. rescue StandardError => e
  19. Rails.logger.warn("[GenerateStoryPreviewImageJob] failed event_id=#{instagram_profile_event_id}: #{e.class}: #{e.message}")
  20. raise
  21. end
  22. end

app/jobs/post_analysis_pipeline_job.rb

0.0% lines covered

100.0% branches covered

34 relevant lines. 0 lines covered and 34 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class PostAnalysisPipelineJob < ApplicationJob
  2. private
  3. def load_pipeline_context!(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, pipeline_run_id:)
  4. account = InstagramAccount.find(instagram_account_id)
  5. profile = account.instagram_profiles.find(instagram_profile_id)
  6. post = profile.instagram_profile_posts.find(instagram_profile_post_id)
  7. pipeline_state = Ai::PostAnalysisPipelineState.new(post: post)
  8. pipeline = pipeline_state.pipeline_for(run_id: pipeline_run_id)
  9. return nil unless pipeline
  10. {
  11. account: account,
  12. profile: profile,
  13. post: post,
  14. pipeline_state: pipeline_state,
  15. pipeline: pipeline
  16. }
  17. rescue ActiveRecord::RecordNotFound
  18. nil
  19. end
  20. def enqueue_pipeline_finalizer(account:, profile:, post:, pipeline_run_id:, attempts: 0)
  21. FinalizePostAnalysisPipelineJob.perform_later(
  22. instagram_account_id: account.id,
  23. instagram_profile_id: profile.id,
  24. instagram_profile_post_id: post.id,
  25. pipeline_run_id: pipeline_run_id,
  26. attempts: attempts
  27. )
  28. rescue StandardError
  29. nil
  30. end
  31. def format_error(error)
  32. "#{error.class}: #{error.message}".byteslice(0, 320)
  33. end
  34. end

app/jobs/post_instagram_profile_comment_job.rb

0.0% lines covered

100.0% branches covered

46 relevant lines. 0 lines covered and 46 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class PostInstagramProfileCommentJob < ApplicationJob
  2. queue_as :messages
  3. def perform(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, comment_text:, media_id:, profile_action_log_id: nil)
  4. account = InstagramAccount.find(instagram_account_id)
  5. profile = account.instagram_profiles.find(instagram_profile_id)
  6. post = profile.instagram_profile_posts.find(instagram_profile_post_id)
  7. action_log = profile.instagram_profile_action_logs.find_by(id: profile_action_log_id)
  8. action_log&.mark_running!(extra_metadata: { queue_name: queue_name, active_job_id: job_id })
  9. result = Instagram::Client.new(account: account).post_comment_to_media!(
  10. media_id: media_id.to_s,
  11. shortcode: post.shortcode.to_s,
  12. comment_text: comment_text.to_s
  13. )
  14. profile.record_event!(
  15. kind: "post_comment_sent",
  16. external_id: "post_comment_sent:#{media_id}:#{Time.current.utc.iso8601(6)}",
  17. occurred_at: Time.current,
  18. metadata: {
  19. source: "profile_post_suggestion_modal",
  20. post_shortcode: post.shortcode,
  21. media_id: media_id.to_s,
  22. comment_text: comment_text.to_s,
  23. api_result: result
  24. }
  25. )
  26. action_log&.mark_succeeded!(
  27. extra_metadata: { post_shortcode: post.shortcode, media_id: media_id.to_s },
  28. log_text: "Comment posted on #{post.shortcode}"
  29. )
  30. Turbo::StreamsChannel.broadcast_append_to(
  31. account,
  32. target: "notifications",
  33. partial: "shared/notification",
  34. locals: { kind: "notice", message: "Comment posted on #{post.shortcode}." }
  35. )
  36. rescue StandardError => e
  37. action_log&.mark_failed!(error_message: e.message, extra_metadata: { active_job_id: job_id, media_id: media_id.to_s })
  38. Turbo::StreamsChannel.broadcast_append_to(
  39. account,
  40. target: "notifications",
  41. partial: "shared/notification",
  42. locals: { kind: "alert", message: "Comment post failed: #{e.message}" }
  43. ) if defined?(account) && account
  44. raise
  45. end
  46. end

app/jobs/process_instagram_account_continuously_job.rb

0.0% lines covered

100.0% branches covered

146 relevant lines. 0 lines covered and 146 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class ProcessInstagramAccountContinuouslyJob < ApplicationJob
  2. queue_as :sync
  3. RUNNING_STALE_AFTER = 15.minutes
  4. retry_on Net::OpenTimeout, Net::ReadTimeout, wait: :polynomially_longer, attempts: 4
  5. retry_on Errno::ECONNREFUSED, Errno::ECONNRESET, wait: :polynomially_longer, attempts: 4
  6. def perform(instagram_account_id:, trigger_source: "scheduler")
  7. account = InstagramAccount.find(instagram_account_id)
  8. return unless account.continuous_processing_enabled?
  9. if retry_backoff_active?(account)
  10. Ops::StructuredLogger.info(
  11. event: "continuous_processing.skipped_retry_backoff",
  12. payload: {
  13. account_id: account.id,
  14. retry_after_at: account.continuous_processing_retry_after_at&.iso8601,
  15. trigger_source: trigger_source
  16. }
  17. )
  18. return
  19. end
  20. acquired = claim_processing_lock!(account: account, trigger_source: trigger_source)
  21. return unless acquired
  22. run = account.sync_runs.create!(
  23. kind: "continuous_processing",
  24. status: "running",
  25. started_at: Time.current,
  26. stats: {
  27. trigger_source: trigger_source,
  28. pipeline_version: "continuous_processing_v1"
  29. }
  30. )
  31. stats = Pipeline::AccountProcessingCoordinator.new(
  32. account: account,
  33. trigger_source: trigger_source
  34. ).run!
  35. run.update!(
  36. status: "succeeded",
  37. finished_at: Time.current,
  38. stats: (run.stats || {}).merge(stats).merge(status: "succeeded")
  39. )
  40. account.update!(
  41. continuous_processing_state: "idle",
  42. continuous_processing_last_finished_at: Time.current,
  43. continuous_processing_last_heartbeat_at: Time.current,
  44. continuous_processing_last_error: nil,
  45. continuous_processing_failure_count: 0,
  46. continuous_processing_retry_after_at: nil
  47. )
  48. Ops::StructuredLogger.info(
  49. event: "continuous_processing.completed",
  50. payload: {
  51. account_id: account.id,
  52. sync_run_id: run.id,
  53. trigger_source: trigger_source,
  54. enqueued_jobs: Array(stats[:enqueued_jobs]).size,
  55. skipped_jobs: Array(stats[:skipped_jobs]).size
  56. }
  57. )
  58. rescue StandardError => e
  59. handle_failure!(
  60. account: account,
  61. run: run,
  62. error: e,
  63. trigger_source: trigger_source,
  64. instagram_account_id: instagram_account_id
  65. )
  66. raise
  67. end
  68. private
  69. def retry_backoff_active?(account)
  70. account.continuous_processing_retry_after_at.present? && account.continuous_processing_retry_after_at > Time.current
  71. end
  72. def claim_processing_lock!(account:, trigger_source:)
  73. claimed = false
  74. account.with_lock do
  75. stale = account.continuous_processing_last_heartbeat_at.blank? || account.continuous_processing_last_heartbeat_at < RUNNING_STALE_AFTER.ago
  76. if account.continuous_processing_state == "running" && !stale
  77. Ops::StructuredLogger.info(
  78. event: "continuous_processing.skipped_already_running",
  79. payload: {
  80. account_id: account.id,
  81. trigger_source: trigger_source,
  82. last_heartbeat_at: account.continuous_processing_last_heartbeat_at&.iso8601
  83. }
  84. )
  85. next
  86. end
  87. account.update!(
  88. continuous_processing_state: "running",
  89. continuous_processing_last_started_at: Time.current,
  90. continuous_processing_last_heartbeat_at: Time.current,
  91. continuous_processing_last_error: nil
  92. )
  93. claimed = true
  94. end
  95. claimed
  96. end
  97. def handle_failure!(account:, run:, error:, trigger_source:, instagram_account_id:)
  98. account ||= InstagramAccount.where(id: instagram_account_id).first
  99. return unless account
  100. account.with_lock do
  101. failures = account.continuous_processing_failure_count.to_i + 1
  102. retry_after = Time.current + failure_backoff_for(failures)
  103. account.update!(
  104. continuous_processing_state: "idle",
  105. continuous_processing_last_finished_at: Time.current,
  106. continuous_processing_last_heartbeat_at: Time.current,
  107. continuous_processing_last_error: "#{error.class}: #{error.message}",
  108. continuous_processing_failure_count: failures,
  109. continuous_processing_retry_after_at: retry_after
  110. )
  111. end
  112. run&.update!(
  113. status: "failed",
  114. finished_at: Time.current,
  115. error_message: error.message,
  116. stats: (run.stats || {}).merge(
  117. status: "failed",
  118. error_class: error.class.name,
  119. error_message: error.message
  120. )
  121. )
  122. Ops::StructuredLogger.error(
  123. event: "continuous_processing.failed",
  124. payload: {
  125. account_id: account.id,
  126. sync_run_id: run&.id,
  127. trigger_source: trigger_source,
  128. error_class: error.class.name,
  129. error_message: error.message,
  130. retry_after_at: account.continuous_processing_retry_after_at&.iso8601,
  131. failure_count: account.continuous_processing_failure_count
  132. }
  133. )
  134. end
  135. def failure_backoff_for(failure_count)
  136. base =
  137. case failure_count
  138. when 1 then 5.minutes
  139. when 2 then 15.minutes
  140. when 3 then 30.minutes
  141. when 4 then 1.hour
  142. else 3.hours
  143. end
  144. base + rand(0..90).seconds
  145. end
  146. end

app/jobs/process_post_face_analysis_job.rb

0.0% lines covered

100.0% branches covered

77 relevant lines. 0 lines covered and 77 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "timeout"
  2. class ProcessPostFaceAnalysisJob < PostAnalysisPipelineJob
  3. queue_as :ai_face_queue
  4. retry_on Net::OpenTimeout, Net::ReadTimeout, wait: :polynomially_longer, attempts: 3
  5. retry_on Errno::ECONNRESET, Errno::ECONNREFUSED, wait: :polynomially_longer, attempts: 3
  6. retry_on Timeout::Error, wait: :polynomially_longer, attempts: 2
  7. def perform(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, pipeline_run_id:)
  8. enqueue_finalizer = true
  9. context = load_pipeline_context!(
  10. instagram_account_id: instagram_account_id,
  11. instagram_profile_id: instagram_profile_id,
  12. instagram_profile_post_id: instagram_profile_post_id,
  13. pipeline_run_id: pipeline_run_id
  14. )
  15. return unless context
  16. pipeline_state = context[:pipeline_state]
  17. post = context[:post]
  18. if pipeline_state.pipeline_terminal?(run_id: pipeline_run_id) || pipeline_state.step_terminal?(run_id: pipeline_run_id, step: "face")
  19. enqueue_finalizer = false
  20. Ops::StructuredLogger.info(
  21. event: "ai.face_analysis.skipped_terminal",
  22. payload: {
  23. active_job_id: job_id,
  24. instagram_account_id: context[:account].id,
  25. instagram_profile_id: context[:profile].id,
  26. instagram_profile_post_id: post.id,
  27. pipeline_run_id: pipeline_run_id
  28. }
  29. )
  30. return
  31. end
  32. pipeline_state.mark_step_running!(
  33. run_id: pipeline_run_id,
  34. step: "face",
  35. queue_name: queue_name,
  36. active_job_id: job_id
  37. )
  38. result = Timeout.timeout(face_timeout_seconds) do
  39. PostFaceRecognitionService.new.process!(post: post)
  40. end
  41. pipeline_state.mark_step_completed!(
  42. run_id: pipeline_run_id,
  43. step: "face",
  44. status: "succeeded",
  45. result: {
  46. skipped: ActiveModel::Type::Boolean.new.cast(result[:skipped]),
  47. face_count: result[:face_count].to_i,
  48. reason: result[:reason].to_s,
  49. matched_people_count: Array(result[:matched_people]).length
  50. }
  51. )
  52. rescue StandardError => e
  53. context&.dig(:pipeline_state)&.mark_step_completed!(
  54. run_id: pipeline_run_id,
  55. step: "face",
  56. status: "failed",
  57. error: format_error(e),
  58. result: {
  59. reason: "face_analysis_failed"
  60. }
  61. )
  62. raise
  63. ensure
  64. if context && enqueue_finalizer
  65. enqueue_pipeline_finalizer(
  66. account: context[:account],
  67. profile: context[:profile],
  68. post: context[:post],
  69. pipeline_run_id: pipeline_run_id
  70. )
  71. end
  72. end
  73. private
  74. def face_timeout_seconds
  75. ENV.fetch("AI_FACE_TIMEOUT_SECONDS", 180).to_i.clamp(20, 420)
  76. end
  77. end

app/jobs/process_post_metadata_tagging_job.rb

0.0% lines covered

100.0% branches covered

231 relevant lines. 0 lines covered and 231 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class ProcessPostMetadataTaggingJob < PostAnalysisPipelineJob
  2. queue_as :ai_metadata_queue
  3. PROFILE_INCOMPLETE_REASON_CODES = %w[
  4. latest_posts_not_analyzed
  5. insufficient_analyzed_posts
  6. no_recent_posts_available
  7. missing_structured_post_signals
  8. profile_preparation_failed
  9. profile_preparation_error
  10. ].freeze
  11. COMMENT_RETRY_MAX_ATTEMPTS = ENV.fetch("POST_COMMENT_RETRY_MAX_ATTEMPTS", 3).to_i.clamp(1, 10)
  12. def perform(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, pipeline_run_id:)
  13. enqueue_finalizer = true
  14. context = load_pipeline_context!(
  15. instagram_account_id: instagram_account_id,
  16. instagram_profile_id: instagram_profile_id,
  17. instagram_profile_post_id: instagram_profile_post_id,
  18. pipeline_run_id: pipeline_run_id
  19. )
  20. return unless context
  21. account = context[:account]
  22. post = context[:post]
  23. profile = context[:profile]
  24. pipeline_state = context[:pipeline_state]
  25. if pipeline_state.pipeline_terminal?(run_id: pipeline_run_id) || pipeline_state.step_terminal?(run_id: pipeline_run_id, step: "metadata")
  26. enqueue_finalizer = false
  27. Ops::StructuredLogger.info(
  28. event: "ai.metadata_tagging.skipped_terminal",
  29. payload: {
  30. active_job_id: job_id,
  31. instagram_account_id: account.id,
  32. instagram_profile_id: profile.id,
  33. instagram_profile_post_id: post.id,
  34. pipeline_run_id: pipeline_run_id
  35. }
  36. )
  37. return
  38. end
  39. pipeline_state.mark_step_running!(
  40. run_id: pipeline_run_id,
  41. step: "metadata",
  42. queue_name: queue_name,
  43. active_job_id: job_id
  44. )
  45. analysis = post.analysis.is_a?(Hash) ? post.analysis.deep_dup : {}
  46. face_meta = post.metadata.is_a?(Hash) ? post.metadata.dig("face_recognition") : nil
  47. face_meta = {} unless face_meta.is_a?(Hash)
  48. matched_people = Array(face_meta["matched_people"])
  49. analysis["face_summary"] = {
  50. "face_count" => face_meta["face_count"].to_i,
  51. "owner_faces_count" => matched_people.count { |row| ActiveModel::Type::Boolean.new.cast(row["owner_match"] || row[:owner_match]) },
  52. "recurring_faces_count" => matched_people.count { |row| ActiveModel::Type::Boolean.new.cast(row["recurring_face"] || row[:recurring_face]) },
  53. "detection_source" => face_meta["detection_source"].to_s.presence,
  54. "participant_summary" => face_meta["participant_summary"].to_s.presence,
  55. "detection_reason" => face_meta["detection_reason"].to_s.presence,
  56. "detection_error" => face_meta["detection_error"].to_s.presence
  57. }.compact
  58. post.update!(analysis: analysis)
  59. Ai::ProfileAutoTagger.sync_from_post_analysis!(profile: profile, analysis: analysis)
  60. comment_result =
  61. if comment_generation_enabled?(pipeline_state: pipeline_state, pipeline_run_id: pipeline_run_id)
  62. Ai::PostCommentGenerationService.new(
  63. account: account,
  64. profile: profile,
  65. post: post,
  66. enforce_required_evidence: comment_evidence_policy_enforced?(pipeline_state: pipeline_state, pipeline_run_id: pipeline_run_id)
  67. ).run!
  68. else
  69. {
  70. blocked: true,
  71. status: "disabled_by_task_flags",
  72. source: "policy",
  73. suggestions_count: 0,
  74. reason_code: "comments_disabled"
  75. }
  76. end
  77. retry_result =
  78. if comment_retry_enabled?(pipeline_state: pipeline_state, pipeline_run_id: pipeline_run_id)
  79. enqueue_comment_retry_if_needed!(
  80. account: account,
  81. profile: profile,
  82. post: post,
  83. comment_result: comment_result
  84. )
  85. else
  86. { queued: false, reason: "retry_disabled" }
  87. end
  88. pipeline_state.mark_step_completed!(
  89. run_id: pipeline_run_id,
  90. step: "metadata",
  91. status: "succeeded",
  92. result: {
  93. face_count: face_meta["face_count"].to_i,
  94. participant_summary_present: face_meta["participant_summary"].to_s.present?,
  95. comment_generation_status: comment_result[:status].to_s,
  96. comment_generation_blocked: ActiveModel::Type::Boolean.new.cast(comment_result[:blocked]),
  97. comment_generation_source: comment_result[:source].to_s,
  98. comment_suggestions_count: comment_result[:suggestions_count].to_i,
  99. comment_reason_code: comment_result[:reason_code].to_s.presence,
  100. comment_history_reason_code: comment_result[:history_reason_code].to_s.presence,
  101. comment_retry_queued: ActiveModel::Type::Boolean.new.cast(retry_result[:queued]),
  102. comment_retry_reason: retry_result[:reason].to_s.presence,
  103. comment_retry_job_id: retry_result[:job_id].to_s.presence,
  104. comment_retry_next_run_at: retry_result[:next_run_at].to_s.presence
  105. }
  106. )
  107. rescue StandardError => e
  108. context&.dig(:pipeline_state)&.mark_step_completed!(
  109. run_id: pipeline_run_id,
  110. step: "metadata",
  111. status: "failed",
  112. error: format_error(e),
  113. result: {
  114. reason: "metadata_tagging_failed"
  115. }
  116. )
  117. raise
  118. ensure
  119. if context && enqueue_finalizer
  120. enqueue_pipeline_finalizer(
  121. account: context[:account],
  122. profile: context[:profile],
  123. post: context[:post],
  124. pipeline_run_id: pipeline_run_id
  125. )
  126. end
  127. end
  128. private
  129. def comment_generation_enabled?(pipeline_state:, pipeline_run_id:)
  130. pipeline = pipeline_state.pipeline_for(run_id: pipeline_run_id)
  131. flags = pipeline.is_a?(Hash) ? pipeline["task_flags"] : {}
  132. flags = {} unless flags.is_a?(Hash)
  133. if flags.key?("generate_comments")
  134. ActiveModel::Type::Boolean.new.cast(flags["generate_comments"])
  135. else
  136. true
  137. end
  138. rescue StandardError
  139. true
  140. end
  141. def comment_evidence_policy_enforced?(pipeline_state:, pipeline_run_id:)
  142. pipeline = pipeline_state.pipeline_for(run_id: pipeline_run_id)
  143. flags = pipeline.is_a?(Hash) ? pipeline["task_flags"] : {}
  144. flags = {} unless flags.is_a?(Hash)
  145. if flags.key?("enforce_comment_evidence_policy")
  146. ActiveModel::Type::Boolean.new.cast(flags["enforce_comment_evidence_policy"])
  147. else
  148. true
  149. end
  150. rescue StandardError
  151. true
  152. end
  153. def comment_retry_enabled?(pipeline_state:, pipeline_run_id:)
  154. pipeline = pipeline_state.pipeline_for(run_id: pipeline_run_id)
  155. flags = pipeline.is_a?(Hash) ? pipeline["task_flags"] : {}
  156. flags = {} unless flags.is_a?(Hash)
  157. if flags.key?("retry_on_incomplete_profile")
  158. ActiveModel::Type::Boolean.new.cast(flags["retry_on_incomplete_profile"])
  159. else
  160. true
  161. end
  162. rescue StandardError
  163. true
  164. end
  165. def enqueue_comment_retry_if_needed!(account:, profile:, post:, comment_result:)
  166. return { queued: false, reason: "comment_not_blocked" } unless ActiveModel::Type::Boolean.new.cast(comment_result[:blocked])
  167. return { queued: false, reason: "reason_not_retryable" } unless comment_result[:reason_code].to_s == "missing_required_evidence"
  168. metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  169. policy = metadata["comment_generation_policy"]
  170. return { queued: false, reason: "policy_missing" } unless policy.is_a?(Hash)
  171. return { queued: false, reason: "history_ready" } if ActiveModel::Type::Boolean.new.cast(policy["history_ready"])
  172. history_reason_code = policy["history_reason_code"].to_s
  173. return { queued: false, reason: "history_reason_not_retryable" } unless PROFILE_INCOMPLETE_REASON_CODES.include?(history_reason_code)
  174. retry_state = policy["retry_state"].is_a?(Hash) ? policy["retry_state"].deep_dup : {}
  175. attempts = retry_state["attempts"].to_i
  176. return { queued: false, reason: "retry_attempts_exhausted" } if attempts >= COMMENT_RETRY_MAX_ATTEMPTS
  177. build_history_result = BuildInstagramProfileHistoryJob.enqueue_with_resume_if_needed!(
  178. account: account,
  179. profile: profile,
  180. trigger_source: "post_metadata_comment_fallback",
  181. requested_by: self.class.name,
  182. resume_job: {
  183. job_class: AnalyzeInstagramProfilePostJob,
  184. job_kwargs: {
  185. instagram_account_id: account.id,
  186. instagram_profile_id: profile.id,
  187. instagram_profile_post_id: post.id,
  188. pipeline_mode: "inline",
  189. task_flags: {
  190. analyze_visual: false,
  191. analyze_faces: false,
  192. run_ocr: false,
  193. run_video: false,
  194. run_metadata: true,
  195. generate_comments: true,
  196. enforce_comment_evidence_policy: true,
  197. retry_on_incomplete_profile: true
  198. }
  199. }
  200. }
  201. )
  202. return { queued: false, reason: build_history_result[:reason] } unless ActiveModel::Type::Boolean.new.cast(build_history_result[:accepted])
  203. retry_state["attempts"] = attempts + 1
  204. retry_state["last_reason_code"] = history_reason_code
  205. retry_state["last_blocked_at"] = Time.current.iso8601(3)
  206. retry_state["last_enqueued_at"] = Time.current.iso8601(3)
  207. retry_state["next_run_at"] = build_history_result[:next_run_at].to_s.presence
  208. retry_state["job_id"] = build_history_result[:job_id].to_s.presence
  209. retry_state["build_history_action_log_id"] = build_history_result[:action_log_id].to_i if build_history_result[:action_log_id].present?
  210. retry_state["source"] = self.class.name
  211. retry_state["mode"] = "build_history_fallback"
  212. policy["retry_state"] = retry_state
  213. policy["updated_at"] = Time.current.iso8601(3)
  214. metadata["comment_generation_policy"] = policy
  215. post.update!(metadata: metadata)
  216. {
  217. queued: true,
  218. reason: "build_history_fallback_registered",
  219. job_id: build_history_result[:job_id].to_s,
  220. action_log_id: build_history_result[:action_log_id],
  221. next_run_at: build_history_result[:next_run_at].to_s
  222. }
  223. rescue StandardError => e
  224. {
  225. queued: false,
  226. reason: "retry_enqueue_failed",
  227. error_class: e.class.name,
  228. error_message: e.message.to_s
  229. }
  230. end
  231. end

app/jobs/process_post_ocr_analysis_job.rb

0.0% lines covered

100.0% branches covered

183 relevant lines. 0 lines covered and 183 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "timeout"
  2. class ProcessPostOcrAnalysisJob < PostAnalysisPipelineJob
  3. queue_as :ai_ocr_queue
  4. MAX_DEFER_ATTEMPTS = ENV.fetch("AI_OCR_MAX_DEFER_ATTEMPTS", 4).to_i.clamp(1, 12)
  5. retry_on Net::OpenTimeout, Net::ReadTimeout, wait: :polynomially_longer, attempts: 3
  6. retry_on Errno::ECONNRESET, Errno::ECONNREFUSED, wait: :polynomially_longer, attempts: 3
  7. retry_on Timeout::Error, wait: :polynomially_longer, attempts: 2
  8. def perform(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, pipeline_run_id:, defer_attempt: 0)
  9. enqueue_finalizer = true
  10. context = load_pipeline_context!(
  11. instagram_account_id: instagram_account_id,
  12. instagram_profile_id: instagram_profile_id,
  13. instagram_profile_post_id: instagram_profile_post_id,
  14. pipeline_run_id: pipeline_run_id
  15. )
  16. return unless context
  17. account = context[:account]
  18. post = context[:post]
  19. pipeline_state = context[:pipeline_state]
  20. if pipeline_state.pipeline_terminal?(run_id: pipeline_run_id) || pipeline_state.step_terminal?(run_id: pipeline_run_id, step: "ocr")
  21. enqueue_finalizer = false
  22. Ops::StructuredLogger.info(
  23. event: "ai.ocr_analysis.skipped_terminal",
  24. payload: {
  25. active_job_id: job_id,
  26. instagram_account_id: account.id,
  27. instagram_profile_id: context[:profile].id,
  28. instagram_profile_post_id: post.id,
  29. pipeline_run_id: pipeline_run_id
  30. }
  31. )
  32. return
  33. end
  34. unless resource_available?(defer_attempt: defer_attempt, context: context, pipeline_run_id: pipeline_run_id)
  35. return
  36. end
  37. pipeline_state.mark_step_running!(
  38. run_id: pipeline_run_id,
  39. step: "ocr",
  40. queue_name: queue_name,
  41. active_job_id: job_id
  42. )
  43. reused = reuse_ocr_from_face_metadata(post: post)
  44. result =
  45. if reused
  46. reused
  47. else
  48. context_builder = Ai::PostAnalysisContextBuilder.new(profile: context[:profile], post: post)
  49. image_payload = context_builder.detection_image_payload
  50. if ActiveModel::Type::Boolean.new.cast(image_payload[:skipped])
  51. {
  52. skipped: true,
  53. ocr_text: nil,
  54. ocr_blocks: [],
  55. metadata: {
  56. source: "post_ocr_service",
  57. reason: image_payload[:reason].to_s.presence || "image_payload_unavailable"
  58. }
  59. }
  60. else
  61. Timeout.timeout(ocr_timeout_seconds) do
  62. Ai::PostOcrService.new.extract_from_image_bytes(
  63. image_bytes: image_payload[:image_bytes],
  64. usage_context: {
  65. workflow: "post_analysis_pipeline",
  66. task: "ocr",
  67. post_id: post.id,
  68. instagram_account_id: account.id
  69. }
  70. )
  71. end
  72. end
  73. end
  74. persist_ocr_result!(post: post, result: result)
  75. pipeline_state.mark_step_completed!(
  76. run_id: pipeline_run_id,
  77. step: "ocr",
  78. status: "succeeded",
  79. result: {
  80. skipped: ActiveModel::Type::Boolean.new.cast(result[:skipped]),
  81. text_present: result[:ocr_text].to_s.present?,
  82. ocr_blocks_count: Array(result[:ocr_blocks]).length,
  83. source: result.dig(:metadata, :source) || result.dig("metadata", "source")
  84. }.compact
  85. )
  86. rescue StandardError => e
  87. context&.dig(:pipeline_state)&.mark_step_completed!(
  88. run_id: pipeline_run_id,
  89. step: "ocr",
  90. status: "failed",
  91. error: format_error(e),
  92. result: {
  93. reason: "ocr_analysis_failed"
  94. }
  95. )
  96. raise
  97. ensure
  98. if context && enqueue_finalizer
  99. enqueue_pipeline_finalizer(
  100. account: context[:account],
  101. profile: context[:profile],
  102. post: context[:post],
  103. pipeline_run_id: pipeline_run_id
  104. )
  105. end
  106. end
  107. private
  108. def resource_available?(defer_attempt:, context:, pipeline_run_id:)
  109. guard = Ops::ResourceGuard.allow_ai_task?(task: "ocr", queue_name: queue_name, critical: false)
  110. return true if ActiveModel::Type::Boolean.new.cast(guard[:allow])
  111. if defer_attempt.to_i >= MAX_DEFER_ATTEMPTS
  112. context[:pipeline_state].mark_step_completed!(
  113. run_id: pipeline_run_id,
  114. step: "ocr",
  115. status: "failed",
  116. error: "resource_guard_exhausted: #{guard[:reason]}",
  117. result: {
  118. reason: "resource_constraints",
  119. snapshot: guard[:snapshot]
  120. }
  121. )
  122. return false
  123. end
  124. retry_seconds = guard[:retry_in_seconds].to_i
  125. retry_seconds = 20 if retry_seconds <= 0
  126. context[:pipeline_state].mark_step_queued!(
  127. run_id: pipeline_run_id,
  128. step: "ocr",
  129. queue_name: queue_name,
  130. active_job_id: job_id,
  131. result: {
  132. reason: "resource_constrained",
  133. defer_attempt: defer_attempt.to_i,
  134. retry_in_seconds: retry_seconds,
  135. snapshot: guard[:snapshot]
  136. }
  137. )
  138. self.class.set(wait: retry_seconds.seconds).perform_later(
  139. instagram_account_id: context[:account].id,
  140. instagram_profile_id: context[:profile].id,
  141. instagram_profile_post_id: context[:post].id,
  142. pipeline_run_id: pipeline_run_id,
  143. defer_attempt: defer_attempt.to_i + 1
  144. )
  145. false
  146. end
  147. def reuse_ocr_from_face_metadata(post:)
  148. face_meta = post.metadata.is_a?(Hash) ? post.metadata.dig("face_recognition") : nil
  149. return nil unless face_meta.is_a?(Hash)
  150. text = face_meta["ocr_text"].to_s.strip
  151. blocks = Array(face_meta["ocr_blocks"]).select { |row| row.is_a?(Hash) }
  152. return nil if text.blank? && blocks.empty?
  153. {
  154. skipped: false,
  155. ocr_text: text.presence,
  156. ocr_blocks: blocks.first(80),
  157. metadata: {
  158. source: "face_recognition_cache"
  159. }
  160. }
  161. end
  162. def persist_ocr_result!(post:, result:)
  163. post.with_lock do
  164. post.reload
  165. analysis = post.analysis.is_a?(Hash) ? post.analysis.deep_dup : {}
  166. metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  167. analysis["ocr_text"] = result[:ocr_text] if result.key?(:ocr_text)
  168. analysis["ocr_blocks"] = Array(result[:ocr_blocks]).first(40) if result.key?(:ocr_blocks)
  169. metadata["ocr_analysis"] = {
  170. "ocr_text" => result[:ocr_text].to_s.presence,
  171. "ocr_blocks" => Array(result[:ocr_blocks]).first(80),
  172. "source" => result.dig(:metadata, :source) || result.dig("metadata", "source"),
  173. "reason" => result.dig(:metadata, :reason) || result.dig("metadata", "reason"),
  174. "error_message" => result.dig(:metadata, :error_message) || result.dig("metadata", "error_message"),
  175. "updated_at" => Time.current.iso8601(3)
  176. }.compact
  177. post.update!(analysis: analysis, metadata: metadata)
  178. end
  179. end
  180. def ocr_timeout_seconds
  181. ENV.fetch("AI_OCR_TIMEOUT_SECONDS", 150).to_i.clamp(15, 360)
  182. end
  183. end

app/jobs/process_post_video_analysis_job.rb

0.0% lines covered

100.0% branches covered

229 relevant lines. 0 lines covered and 229 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "timeout"
  2. class ProcessPostVideoAnalysisJob < PostAnalysisPipelineJob
  3. queue_as :video_processing_queue
  4. MAX_DEFER_ATTEMPTS = ENV.fetch("AI_VIDEO_MAX_DEFER_ATTEMPTS", 4).to_i.clamp(1, 12)
  5. retry_on Timeout::Error, wait: :polynomially_longer, attempts: 2
  6. def perform(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, pipeline_run_id:, defer_attempt: 0)
  7. enqueue_finalizer = true
  8. context = load_pipeline_context!(
  9. instagram_account_id: instagram_account_id,
  10. instagram_profile_id: instagram_profile_id,
  11. instagram_profile_post_id: instagram_profile_post_id,
  12. pipeline_run_id: pipeline_run_id
  13. )
  14. return unless context
  15. pipeline_state = context[:pipeline_state]
  16. if pipeline_state.pipeline_terminal?(run_id: pipeline_run_id) || pipeline_state.step_terminal?(run_id: pipeline_run_id, step: "video")
  17. enqueue_finalizer = false
  18. Ops::StructuredLogger.info(
  19. event: "ai.video_analysis.skipped_terminal",
  20. payload: {
  21. active_job_id: job_id,
  22. instagram_account_id: context[:account].id,
  23. instagram_profile_id: context[:profile].id,
  24. instagram_profile_post_id: context[:post].id,
  25. pipeline_run_id: pipeline_run_id
  26. }
  27. )
  28. return
  29. end
  30. unless resource_available?(defer_attempt: defer_attempt, context: context, pipeline_run_id: pipeline_run_id)
  31. return
  32. end
  33. profile = context[:profile]
  34. post = context[:post]
  35. pipeline_state.mark_step_running!(
  36. run_id: pipeline_run_id,
  37. step: "video",
  38. queue_name: queue_name,
  39. active_job_id: job_id
  40. )
  41. builder = Ai::PostAnalysisContextBuilder.new(profile: profile, post: post)
  42. payload = builder.video_payload
  43. if ActiveModel::Type::Boolean.new.cast(payload[:skipped])
  44. persist_video_analysis!(post: post, result: payload)
  45. pipeline_state.mark_step_completed!(
  46. run_id: pipeline_run_id,
  47. step: "video",
  48. status: "succeeded",
  49. result: {
  50. skipped: true,
  51. reason: payload[:reason].to_s
  52. }
  53. )
  54. return
  55. end
  56. result = Timeout.timeout(video_timeout_seconds) do
  57. PostVideoContextExtractionService.new.extract(
  58. video_bytes: payload[:video_bytes],
  59. reference_id: payload[:reference_id].to_s.presence || "post_media_#{post.id}",
  60. content_type: payload[:content_type]
  61. )
  62. end
  63. persist_video_analysis!(post: post, result: result)
  64. pipeline_state.mark_step_completed!(
  65. run_id: pipeline_run_id,
  66. step: "video",
  67. status: "succeeded",
  68. result: {
  69. skipped: ActiveModel::Type::Boolean.new.cast(result[:skipped]),
  70. processing_mode: result[:processing_mode].to_s,
  71. static: ActiveModel::Type::Boolean.new.cast(result[:static]),
  72. semantic_route: result[:semantic_route].to_s.presence,
  73. duration_seconds: result[:duration_seconds],
  74. has_audio: ActiveModel::Type::Boolean.new.cast(result[:has_audio]),
  75. transcript_present: result[:transcript].to_s.present?,
  76. topics_count: Array(result[:topics]).length
  77. }
  78. )
  79. rescue StandardError => e
  80. context&.dig(:pipeline_state)&.mark_step_completed!(
  81. run_id: pipeline_run_id,
  82. step: "video",
  83. status: "failed",
  84. error: format_error(e),
  85. result: {
  86. reason: "video_analysis_failed"
  87. }
  88. )
  89. raise
  90. ensure
  91. if context && enqueue_finalizer
  92. enqueue_pipeline_finalizer(
  93. account: context[:account],
  94. profile: context[:profile],
  95. post: context[:post],
  96. pipeline_run_id: pipeline_run_id
  97. )
  98. end
  99. end
  100. private
  101. def resource_available?(defer_attempt:, context:, pipeline_run_id:)
  102. guard = Ops::ResourceGuard.allow_ai_task?(task: "video", queue_name: queue_name, critical: false)
  103. return true if ActiveModel::Type::Boolean.new.cast(guard[:allow])
  104. if defer_attempt.to_i >= MAX_DEFER_ATTEMPTS
  105. context[:pipeline_state].mark_step_completed!(
  106. run_id: pipeline_run_id,
  107. step: "video",
  108. status: "failed",
  109. error: "resource_guard_exhausted: #{guard[:reason]}",
  110. result: {
  111. reason: "resource_constraints",
  112. snapshot: guard[:snapshot]
  113. }
  114. )
  115. return false
  116. end
  117. retry_seconds = guard[:retry_in_seconds].to_i
  118. retry_seconds = 20 if retry_seconds <= 0
  119. context[:pipeline_state].mark_step_queued!(
  120. run_id: pipeline_run_id,
  121. step: "video",
  122. queue_name: queue_name,
  123. active_job_id: job_id,
  124. result: {
  125. reason: "resource_constrained",
  126. defer_attempt: defer_attempt.to_i,
  127. retry_in_seconds: retry_seconds,
  128. snapshot: guard[:snapshot]
  129. }
  130. )
  131. self.class.set(wait: retry_seconds.seconds).perform_later(
  132. instagram_account_id: context[:account].id,
  133. instagram_profile_id: context[:profile].id,
  134. instagram_profile_post_id: context[:post].id,
  135. pipeline_run_id: pipeline_run_id,
  136. defer_attempt: defer_attempt.to_i + 1
  137. )
  138. false
  139. end
  140. def persist_video_analysis!(post:, result:)
  141. normalized = normalize_video_result(result)
  142. post.with_lock do
  143. post.reload
  144. analysis = post.analysis.is_a?(Hash) ? post.analysis.deep_dup : {}
  145. metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  146. analysis["video_processing_mode"] = normalized[:processing_mode].to_s if normalized.key?(:processing_mode)
  147. analysis["video_static_detected"] = ActiveModel::Type::Boolean.new.cast(normalized[:static]) if normalized.key?(:static)
  148. analysis["video_semantic_route"] = normalized[:semantic_route].to_s if normalized[:semantic_route].to_s.present?
  149. analysis["video_duration_seconds"] = normalized[:duration_seconds] if normalized.key?(:duration_seconds)
  150. analysis["video_context_summary"] = normalized[:context_summary].to_s if normalized[:context_summary].to_s.present?
  151. analysis["transcript"] = normalized[:transcript].to_s if normalized[:transcript].to_s.present?
  152. analysis["video_topics"] = normalized[:topics] if normalized[:topics].is_a?(Array)
  153. analysis["video_objects"] = normalized[:objects] if normalized[:objects].is_a?(Array)
  154. analysis["video_scenes"] = normalized[:scenes] if normalized[:scenes].is_a?(Array)
  155. analysis["video_hashtags"] = normalized[:hashtags] if normalized[:hashtags].is_a?(Array)
  156. analysis["video_mentions"] = normalized[:mentions] if normalized[:mentions].is_a?(Array)
  157. analysis["video_profile_handles"] = normalized[:profile_handles] if normalized[:profile_handles].is_a?(Array)
  158. analysis["video_ocr_text"] = normalized[:ocr_text].to_s if normalized[:ocr_text].to_s.present?
  159. analysis["video_ocr_blocks"] = normalized[:ocr_blocks] if normalized[:ocr_blocks].is_a?(Array)
  160. analysis["topics"] = merge_strings(analysis["topics"], normalized[:topics], limit: 40)
  161. analysis["objects"] = merge_strings(analysis["objects"], normalized[:objects], limit: 50)
  162. analysis["hashtags"] = merge_strings(analysis["hashtags"], normalized[:hashtags], limit: 50)
  163. analysis["mentions"] = merge_strings(analysis["mentions"], normalized[:mentions], limit: 50)
  164. if analysis["ocr_text"].to_s.blank? && normalized[:ocr_text].to_s.present?
  165. analysis["ocr_text"] = normalized[:ocr_text].to_s
  166. end
  167. if Array(analysis["ocr_blocks"]).empty? && normalized[:ocr_blocks].is_a?(Array)
  168. analysis["ocr_blocks"] = normalized[:ocr_blocks].first(40)
  169. end
  170. metadata["video_processing"] = {
  171. "skipped" => ActiveModel::Type::Boolean.new.cast(normalized[:skipped]),
  172. "processing_mode" => normalized[:processing_mode].to_s,
  173. "static" => ActiveModel::Type::Boolean.new.cast(normalized[:static]),
  174. "semantic_route" => normalized[:semantic_route].to_s.presence,
  175. "duration_seconds" => normalized[:duration_seconds],
  176. "has_audio" => ActiveModel::Type::Boolean.new.cast(normalized[:has_audio]),
  177. "transcript" => normalized[:transcript].to_s.presence,
  178. "topics" => normalized[:topics],
  179. "objects" => normalized[:objects],
  180. "scenes" => normalized[:scenes],
  181. "hashtags" => normalized[:hashtags],
  182. "mentions" => normalized[:mentions],
  183. "profile_handles" => normalized[:profile_handles],
  184. "ocr_text" => normalized[:ocr_text].to_s.presence,
  185. "ocr_blocks" => normalized[:ocr_blocks],
  186. "context_summary" => normalized[:context_summary].to_s.presence,
  187. "metadata" => normalized[:metadata],
  188. "updated_at" => Time.current.iso8601(3)
  189. }.compact
  190. post.update!(analysis: analysis, metadata: metadata)
  191. end
  192. end
  193. def normalize_video_result(result)
  194. row = result.is_a?(Hash) ? result : {}
  195. {
  196. skipped: value_for(row, :skipped),
  197. processing_mode: value_for(row, :processing_mode).to_s.presence || "dynamic_video",
  198. static: value_for(row, :static),
  199. semantic_route: value_for(row, :semantic_route),
  200. duration_seconds: value_for(row, :duration_seconds),
  201. has_audio: value_for(row, :has_audio),
  202. transcript: value_for(row, :transcript),
  203. topics: normalized_strings(value_for(row, :topics), limit: 40),
  204. objects: normalized_strings(value_for(row, :objects), limit: 50),
  205. scenes: Array(value_for(row, :scenes)).select { |value| value.is_a?(Hash) }.first(50),
  206. hashtags: normalized_strings(value_for(row, :hashtags), limit: 50),
  207. mentions: normalized_strings(value_for(row, :mentions), limit: 50),
  208. profile_handles: normalized_strings(value_for(row, :profile_handles), limit: 50),
  209. ocr_text: value_for(row, :ocr_text),
  210. ocr_blocks: Array(value_for(row, :ocr_blocks)).select { |value| value.is_a?(Hash) }.first(80),
  211. context_summary: value_for(row, :context_summary),
  212. metadata: row[:metadata] || row["metadata"] || { reason: row[:reason] || row["reason"] }
  213. }
  214. end
  215. def normalized_strings(values, limit:)
  216. Array(values).map(&:to_s).map(&:strip).reject(&:blank?).uniq.first(limit)
  217. end
  218. def merge_strings(existing, incoming, limit:)
  219. normalized_strings(Array(existing) + Array(incoming), limit: limit)
  220. end
  221. def value_for(row, key)
  222. return row[key] if row.key?(key)
  223. return row[key.to_s] if row.key?(key.to_s)
  224. nil
  225. end
  226. def video_timeout_seconds
  227. ENV.fetch("AI_VIDEO_TIMEOUT_SECONDS", 180).to_i.clamp(20, 420)
  228. end
  229. end

app/jobs/process_post_visual_analysis_job.rb

0.0% lines covered

100.0% branches covered

186 relevant lines. 0 lines covered and 186 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "timeout"
  2. class ProcessPostVisualAnalysisJob < PostAnalysisPipelineJob
  3. queue_as :ai_visual_queue
  4. MAX_VISUAL_ATTEMPTS = ENV.fetch("AI_VISUAL_MAX_ATTEMPTS", 6).to_i.clamp(1, 20)
  5. retry_on Net::OpenTimeout, Net::ReadTimeout, wait: :polynomially_longer, attempts: 3
  6. retry_on Errno::ECONNRESET, Errno::ECONNREFUSED, wait: :polynomially_longer, attempts: 3
  7. retry_on Timeout::Error, wait: :polynomially_longer, attempts: 2
  8. def perform(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, pipeline_run_id:)
  9. enqueue_finalizer = true
  10. context = load_pipeline_context!(
  11. instagram_account_id: instagram_account_id,
  12. instagram_profile_id: instagram_profile_id,
  13. instagram_profile_post_id: instagram_profile_post_id,
  14. pipeline_run_id: pipeline_run_id
  15. )
  16. return unless context
  17. account = context[:account]
  18. profile = context[:profile]
  19. post = context[:post]
  20. pipeline_state = context[:pipeline_state]
  21. started_monotonic = Process.clock_gettime(Process::CLOCK_MONOTONIC) rescue nil
  22. if pipeline_state.pipeline_terminal?(run_id: pipeline_run_id) || pipeline_state.step_terminal?(run_id: pipeline_run_id, step: "visual")
  23. enqueue_finalizer = false
  24. Ops::StructuredLogger.info(
  25. event: "ai.visual_analysis.skipped_terminal",
  26. payload: {
  27. active_job_id: job_id,
  28. instagram_account_id: account.id,
  29. instagram_profile_id: profile.id,
  30. instagram_profile_post_id: post.id,
  31. pipeline_run_id: pipeline_run_id
  32. }
  33. )
  34. return
  35. end
  36. if visual_attempts_exhausted?(pipeline_state: pipeline_state, pipeline_run_id: pipeline_run_id)
  37. pipeline_state.mark_step_completed!(
  38. run_id: pipeline_run_id,
  39. step: "visual",
  40. status: "failed",
  41. error: "visual_attempts_exhausted",
  42. result: {
  43. reason: "visual_attempts_exhausted",
  44. max_attempts: MAX_VISUAL_ATTEMPTS
  45. }
  46. )
  47. Ops::StructuredLogger.warn(
  48. event: "ai.visual_analysis.exhausted",
  49. payload: {
  50. active_job_id: job_id,
  51. instagram_account_id: account.id,
  52. instagram_profile_id: profile.id,
  53. instagram_profile_post_id: post.id,
  54. pipeline_run_id: pipeline_run_id,
  55. max_attempts: MAX_VISUAL_ATTEMPTS
  56. }
  57. )
  58. return
  59. end
  60. pipeline_state.mark_step_running!(
  61. run_id: pipeline_run_id,
  62. step: "visual",
  63. queue_name: queue_name,
  64. active_job_id: job_id
  65. )
  66. builder = Ai::PostAnalysisContextBuilder.new(profile: profile, post: post)
  67. payload = builder.payload
  68. media = builder.media_payload
  69. fingerprint = builder.media_fingerprint(media: media)
  70. media_summary = media_context(media: media)
  71. if media_summary[:media_type] == "none"
  72. Ops::StructuredLogger.warn(
  73. event: "ai.visual_analysis.media_skipped",
  74. payload: {
  75. active_job_id: job_id,
  76. instagram_account_id: account.id,
  77. instagram_profile_id: profile.id,
  78. instagram_profile_post_id: post.id,
  79. pipeline_run_id: pipeline_run_id,
  80. reason: media_summary[:reason],
  81. media_content_type: media_summary[:media_content_type]
  82. }
  83. )
  84. end
  85. run = Timeout.timeout(visual_timeout_seconds) do
  86. Ai::Runner.new(account: account).analyze!(
  87. purpose: "post",
  88. analyzable: post,
  89. payload: payload,
  90. media: media,
  91. media_fingerprint: fingerprint,
  92. provider_options: {
  93. visual_only: true,
  94. include_faces: false,
  95. include_ocr: false,
  96. include_comment_generation: false
  97. }
  98. )
  99. end
  100. duration_ms =
  101. if started_monotonic
  102. ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_monotonic) * 1000).round
  103. end
  104. post.update!(
  105. ai_provider: run[:provider].key,
  106. ai_model: run.dig(:result, :model),
  107. analysis: run.dig(:result, :analysis),
  108. ai_status: "running"
  109. )
  110. pipeline_state.mark_step_completed!(
  111. run_id: pipeline_run_id,
  112. step: "visual",
  113. status: "succeeded",
  114. result: {
  115. provider: run[:provider].key,
  116. model: run.dig(:result, :model),
  117. ai_analysis_id: run[:record]&.id,
  118. cache_hit: ActiveModel::Type::Boolean.new.cast(run[:cached]),
  119. media_type: media_summary[:media_type],
  120. media_content_type: media_summary[:media_content_type],
  121. media_source: media_summary[:media_source],
  122. media_byte_size: media_summary[:media_byte_size],
  123. duration_ms: duration_ms
  124. }
  125. )
  126. rescue StandardError => e
  127. context&.dig(:pipeline_state)&.mark_step_completed!(
  128. run_id: pipeline_run_id,
  129. step: "visual",
  130. status: "failed",
  131. error: format_error(e),
  132. result: {
  133. reason: "visual_analysis_failed"
  134. }
  135. )
  136. Ops::StructuredLogger.warn(
  137. event: "ai.visual_analysis.failed",
  138. payload: {
  139. active_job_id: job_id,
  140. instagram_account_id: context&.dig(:account)&.id || instagram_account_id,
  141. instagram_profile_id: context&.dig(:profile)&.id || instagram_profile_id,
  142. instagram_profile_post_id: context&.dig(:post)&.id || instagram_profile_post_id,
  143. pipeline_run_id: pipeline_run_id,
  144. error_class: e.class.name,
  145. error_message: e.message.to_s.byteslice(0, 280),
  146. retryable: retryable_visual_error?(e)
  147. }
  148. )
  149. raise if retryable_visual_error?(e)
  150. ensure
  151. if context && enqueue_finalizer
  152. enqueue_pipeline_finalizer(
  153. account: context[:account],
  154. profile: context[:profile],
  155. post: context[:post],
  156. pipeline_run_id: pipeline_run_id
  157. )
  158. end
  159. end
  160. private
  161. def visual_attempts_exhausted?(pipeline_state:, pipeline_run_id:)
  162. attempts = pipeline_state.step_state(run_id: pipeline_run_id, step: "visual").to_h["attempts"].to_i
  163. attempts >= MAX_VISUAL_ATTEMPTS
  164. end
  165. def retryable_visual_error?(error)
  166. return true if error.is_a?(Timeout::Error)
  167. return true if error.is_a?(Net::OpenTimeout) || error.is_a?(Net::ReadTimeout)
  168. return true if error.is_a?(Errno::ECONNRESET) || error.is_a?(Errno::ECONNREFUSED)
  169. false
  170. end
  171. def media_context(media:)
  172. payload = media.is_a?(Hash) ? media : {}
  173. bytes = payload[:bytes]
  174. byte_size = bytes.respond_to?(:bytesize) ? bytes.bytesize : nil
  175. {
  176. media_type: payload[:type].to_s.presence || "none",
  177. media_content_type: payload[:content_type].to_s.presence,
  178. media_source: payload[:source].to_s.presence,
  179. media_byte_size: byte_size,
  180. reason: payload[:reason].to_s.presence
  181. }
  182. end
  183. def visual_timeout_seconds
  184. ENV.fetch("AI_VISUAL_TIMEOUT_SECONDS", 210).to_i.clamp(30, 600)
  185. end
  186. end

app/jobs/purge_expired_instagram_post_media_job.rb

0.0% lines covered

100.0% branches covered

21 relevant lines. 0 lines covered and 21 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class PurgeExpiredInstagramPostMediaJob < ApplicationJob
  2. queue_as :post_downloads
  3. def perform(opts = nil, **kwargs)
  4. params = normalize_params(opts, kwargs, limit: 200)
  5. now = Time.current
  6. scope = InstagramPost.where("purge_at IS NOT NULL AND purge_at <= ?", now).order(purge_at: :asc).limit(params[:limit].to_i.clamp(1, 2000))
  7. scope.find_each do |post|
  8. begin
  9. post.media.purge if post.media.attached?
  10. rescue StandardError
  11. nil
  12. end
  13. post.update_columns(purge_at: nil) # avoid reprocessing
  14. end
  15. end
  16. private
  17. def normalize_params(opts, kwargs, defaults)
  18. from_opts = opts.is_a?(Hash) ? opts.symbolize_keys : {}
  19. defaults.merge(from_opts).merge(kwargs.symbolize_keys)
  20. end
  21. end

app/jobs/refresh_account_audit_logs_job.rb

0.0% lines covered

100.0% branches covered

35 relevant lines. 0 lines covered and 35 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class RefreshAccountAuditLogsJob < ApplicationJob
  2. queue_as :maintenance
  3. THROTTLE_SECONDS = 2.0
  4. THROTTLE_EXPIRY = 30.seconds
  5. def self.enqueue_for(instagram_account_id:, limit: 120)
  6. account_id = instagram_account_id.to_i
  7. return if account_id <= 0
  8. now = Time.current.to_f
  9. key = throttle_key(account_id)
  10. last_enqueued = Rails.cache.read(key).to_f
  11. return if last_enqueued.positive? && (now - last_enqueued) < THROTTLE_SECONDS
  12. Rails.cache.write(key, now, expires_in: THROTTLE_EXPIRY)
  13. perform_later(instagram_account_id: account_id, limit: limit)
  14. rescue StandardError
  15. perform_later(instagram_account_id: account_id, limit: limit)
  16. end
  17. def perform(instagram_account_id:, limit: 120)
  18. account = InstagramAccount.find_by(id: instagram_account_id)
  19. return unless account
  20. entries = Ops::AuditLogBuilder.for_account(instagram_account: account, limit: limit.to_i.clamp(20, 250))
  21. Turbo::StreamsChannel.broadcast_replace_to(
  22. account,
  23. target: "account_audit_logs_section",
  24. partial: "instagram_accounts/audit_logs_section",
  25. locals: { recent_audit_entries: entries }
  26. )
  27. rescue StandardError => e
  28. Rails.logger.warn("[RefreshAccountAuditLogsJob] failed for account_id=#{instagram_account_id}: #{e.class}: #{e.message}")
  29. nil
  30. end
  31. def self.throttle_key(account_id)
  32. "jobs:refresh_account_audit_logs:last_enqueued:#{account_id}"
  33. end
  34. private_class_method :throttle_key
  35. end

app/jobs/refresh_profile_post_face_identity_job.rb

0.0% lines covered

100.0% branches covered

74 relevant lines. 0 lines covered and 74 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "timeout"
  2. class RefreshProfilePostFaceIdentityJob < ApplicationJob
  3. queue_as :ai_face_queue
  4. retry_on Net::OpenTimeout, Net::ReadTimeout, wait: :polynomially_longer, attempts: 3
  5. retry_on Errno::ECONNRESET, Errno::ECONNREFUSED, wait: :polynomially_longer, attempts: 3
  6. retry_on Timeout::Error, wait: :polynomially_longer, attempts: 2
  7. def perform(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, trigger_source: "profile_history_build")
  8. account = InstagramAccount.find_by(id: instagram_account_id)
  9. return unless account
  10. profile = account.instagram_profiles.find_by(id: instagram_profile_id)
  11. return unless profile
  12. post = profile.instagram_profile_posts.find_by(id: instagram_profile_post_id)
  13. return unless post && post.media.attached?
  14. mark_face_refresh_state!(
  15. post: post,
  16. attributes: {
  17. "status" => "running",
  18. "started_at" => Time.current.iso8601(3),
  19. "trigger_source" => trigger_source.to_s.presence || "profile_history_build",
  20. "active_job_id" => job_id,
  21. "queue_name" => queue_name
  22. }
  23. )
  24. result = Timeout.timeout(face_refresh_timeout_seconds) do
  25. PostFaceRecognitionService.new.process!(post: post)
  26. end
  27. mark_face_refresh_state!(
  28. post: post,
  29. attributes: {
  30. "status" => "completed",
  31. "finished_at" => Time.current.iso8601(3),
  32. "result" => {
  33. "skipped" => ActiveModel::Type::Boolean.new.cast(result[:skipped]),
  34. "reason" => result[:reason].to_s.presence,
  35. "face_count" => result[:face_count].to_i,
  36. "linked_face_count" => result[:linked_face_count].to_i,
  37. "low_confidence_filtered_count" => result[:low_confidence_filtered_count].to_i,
  38. "matched_people_count" => Array(result[:matched_people]).length
  39. }.compact
  40. }
  41. )
  42. rescue StandardError => e
  43. if defined?(post) && post&.persisted?
  44. mark_face_refresh_state!(
  45. post: post,
  46. attributes: {
  47. "status" => "failed",
  48. "failed_at" => Time.current.iso8601(3),
  49. "error_class" => e.class.name,
  50. "error_message" => e.message.to_s.byteslice(0, 280)
  51. }
  52. )
  53. end
  54. raise
  55. end
  56. private
  57. def face_refresh_timeout_seconds
  58. ENV.fetch("PROFILE_HISTORY_FACE_REFRESH_TIMEOUT_SECONDS", "180").to_i.clamp(20, 420)
  59. end
  60. def mark_face_refresh_state!(post:, attributes:)
  61. post.with_lock do
  62. metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  63. history = metadata["history_build"].is_a?(Hash) ? metadata["history_build"].deep_dup : {}
  64. state = history["face_refresh"].is_a?(Hash) ? history["face_refresh"].deep_dup : {}
  65. state.merge!(attributes.to_h.compact)
  66. history["face_refresh"] = state
  67. history["updated_at"] = Time.current.iso8601(3)
  68. metadata["history_build"] = history
  69. post.update!(metadata: metadata)
  70. end
  71. rescue StandardError
  72. nil
  73. end
  74. end

app/jobs/retry_failed_background_jobs_job.rb

0.0% lines covered

100.0% branches covered

16 relevant lines. 0 lines covered and 16 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class RetryFailedBackgroundJobsJob < ApplicationJob
  2. queue_as :sync
  3. def perform(opts = nil, **kwargs)
  4. params = normalize_params(opts, kwargs, limit: 20, max_attempts: 3, cooldown_minutes: 10)
  5. Jobs::FailureRetry.enqueue_automatic_retries!(
  6. limit: params[:limit],
  7. max_attempts: params[:max_attempts],
  8. cooldown: params[:cooldown_minutes].to_i.clamp(1, 120).minutes
  9. )
  10. end
  11. private
  12. def normalize_params(opts, kwargs, defaults)
  13. from_opts = opts.is_a?(Hash) ? opts.symbolize_keys : {}
  14. defaults.merge(from_opts).merge(kwargs.symbolize_keys)
  15. end
  16. end

app/jobs/send_instagram_message_job.rb

0.0% lines covered

100.0% branches covered

31 relevant lines. 0 lines covered and 31 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class SendInstagramMessageJob < ApplicationJob
  2. queue_as :messages
  3. def perform(instagram_account_id:, instagram_message_id:)
  4. message = InstagramMessage.find(instagram_message_id)
  5. account = InstagramAccount.find(instagram_account_id)
  6. raise "Message/account mismatch" unless message.instagram_account_id == account.id
  7. message.update!(status: "queued", error_message: nil)
  8. broadcast_message(account: account, message: message)
  9. Instagram::Client.new(account: account).send_message_to_user!(
  10. username: message.instagram_profile.username,
  11. message_text: message.body
  12. )
  13. message.update!(status: "sent", sent_at: Time.current)
  14. broadcast_message(account: account, message: message)
  15. rescue StandardError => e
  16. account ||= InstagramAccount.where(id: instagram_account_id).first
  17. message ||= InstagramMessage.where(id: instagram_message_id).first
  18. message&.update!(status: "failed", error_message: e.message)
  19. broadcast_message(account: account, message: message) if account && message
  20. raise
  21. end
  22. private
  23. def broadcast_message(account:, message:)
  24. Turbo::StreamsChannel.broadcast_replace_to(
  25. account,
  26. target: ActionView::RecordIdentifier.dom_id(message),
  27. partial: "instagram_messages/row",
  28. locals: { message: message }
  29. )
  30. end
  31. end

app/jobs/story_processing_job.rb

0.0% lines covered

100.0% branches covered

7 relevant lines. 0 lines covered and 7 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class StoryProcessingJob < ApplicationJob
  2. queue_as :frame_generation
  3. def perform(instagram_story_id:, force: false)
  4. story = InstagramStory.find(instagram_story_id)
  5. StoryProcessingService.new(story: story, force: force).process!
  6. end
  7. end

app/jobs/sync_all_home_stories_job.rb

0.0% lines covered

100.0% branches covered

53 relevant lines. 0 lines covered and 53 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class SyncAllHomeStoriesJob < ApplicationJob
  2. queue_as :story_downloads
  3. MAX_CYCLES = 30
  4. def perform(instagram_account_id:, cycle_story_limit: SyncHomeStoryCarouselJob::STORY_BATCH_LIMIT)
  5. account = InstagramAccount.find(instagram_account_id)
  6. batch_limit = cycle_story_limit.to_i.clamp(1, SyncHomeStoryCarouselJob::STORY_BATCH_LIMIT)
  7. totals = Hash.new(0)
  8. cycles = 0
  9. idle_cycles = 0
  10. stop_reason = "max_cycles_reached"
  11. MAX_CYCLES.times do
  12. cycles += 1
  13. result = Instagram::Client.new(account: account).sync_home_story_carousel!(story_limit: batch_limit, auto_reply_only: false)
  14. merge_totals!(totals, result)
  15. moved_work = result[:downloaded].to_i + result[:commented].to_i + result[:analyzed].to_i
  16. idle_cycles = moved_work.zero? ? idle_cycles + 1 : 0
  17. if result[:stories_visited].to_i < batch_limit
  18. stop_reason = "depleted_before_batch_limit"
  19. break
  20. end
  21. if idle_cycles >= 2
  22. stop_reason = "no_new_work_for_two_cycles"
  23. break
  24. end
  25. end
  26. message = "Continuous story sync done: cycles=#{cycles}, reason=#{stop_reason}, visited=#{totals[:stories_visited]}, downloaded=#{totals[:downloaded]}, analyzed=#{totals[:analyzed]}, commented=#{totals[:commented]}, reacted=#{totals[:reacted]}, skipped_ads=#{totals[:skipped_ads]}, skipped_unreplyable=#{totals[:skipped_unreplyable]}, skipped_interaction_retry=#{totals[:skipped_interaction_retry]}, skipped_reshared_external_link=#{totals[:skipped_reshared_external_link]}, failed=#{totals[:failed]}."
  27. kind = totals[:failed].to_i.positive? ? "alert" : "notice"
  28. Turbo::StreamsChannel.broadcast_append_to(
  29. account,
  30. target: "notifications",
  31. partial: "shared/notification",
  32. locals: { kind: kind, message: message }
  33. )
  34. rescue StandardError => e
  35. account ||= InstagramAccount.where(id: instagram_account_id).first
  36. Turbo::StreamsChannel.broadcast_append_to(
  37. account,
  38. target: "notifications",
  39. partial: "shared/notification",
  40. locals: { kind: "alert", message: "Continuous story sync failed: #{e.message}" }
  41. ) if account
  42. raise
  43. end
  44. private
  45. def merge_totals!(totals, result)
  46. %i[
  47. stories_visited downloaded analyzed commented reacted skipped_video skipped_not_tagged
  48. skipped_ads skipped_invalid_media skipped_unreplyable skipped_interaction_retry skipped_reshared_external_link skipped_out_of_network failed
  49. ].each do |key|
  50. totals[key] += result[key].to_i
  51. end
  52. end
  53. end

app/jobs/sync_follow_graph_job.rb

0.0% lines covered

100.0% branches covered

46 relevant lines. 0 lines covered and 46 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class SyncFollowGraphJob < ApplicationJob
  2. queue_as :sync
  3. retry_on Selenium::WebDriver::Error::StaleElementReferenceError, wait: 3.seconds, attempts: 3
  4. def perform(instagram_account_id:, sync_run_id:)
  5. account = InstagramAccount.find(instagram_account_id)
  6. sync_run = account.sync_runs.find(sync_run_id)
  7. sync_run.update!(status: "running", started_at: Time.current, error_message: nil)
  8. broadcast_status(account: account, sync_run: sync_run)
  9. stats = Instagram::Client.new(account: account).sync_follow_graph!
  10. sync_run.update!(status: "succeeded", finished_at: Time.current, stats: stats)
  11. broadcast_status(account: account, sync_run: sync_run)
  12. broadcast_notice(account: account, message: "Follow graph sync complete: #{stats[:profiles_total]} profiles (mutuals: #{stats[:mutuals]}).")
  13. rescue StandardError => e
  14. account ||= InstagramAccount.where(id: instagram_account_id).first
  15. sync_run ||= account&.sync_runs&.where(id: sync_run_id)&.first
  16. sync_run&.update!(status: "failed", finished_at: Time.current, error_message: e.message)
  17. broadcast_status(account: account, sync_run: sync_run) if account && sync_run
  18. broadcast_alert(account: account, message: "Follow graph sync failed: #{e.message}") if account
  19. raise
  20. end
  21. private
  22. def broadcast_status(account:, sync_run:)
  23. Turbo::StreamsChannel.broadcast_replace_to(
  24. account,
  25. target: "sync_status",
  26. partial: "sync_runs/status",
  27. locals: { sync_run: sync_run }
  28. )
  29. end
  30. def broadcast_notice(account:, message:)
  31. Turbo::StreamsChannel.broadcast_append_to(
  32. account,
  33. target: "notifications",
  34. partial: "shared/notification",
  35. locals: { kind: "notice", message: message }
  36. )
  37. end
  38. def broadcast_alert(account:, message:)
  39. Turbo::StreamsChannel.broadcast_append_to(
  40. account,
  41. target: "notifications",
  42. partial: "shared/notification",
  43. locals: { kind: "alert", message: message }
  44. )
  45. end
  46. end

app/jobs/sync_home_story_carousel_job.rb

0.0% lines covered

100.0% branches covered

42 relevant lines. 0 lines covered and 42 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class SyncHomeStoryCarouselJob < ApplicationJob
  2. queue_as :story_downloads
  3. STORY_BATCH_LIMIT = 10
  4. retry_on Net::OpenTimeout, Net::ReadTimeout, wait: :polynomially_longer, attempts: 3
  5. retry_on Errno::ECONNRESET, Errno::ECONNREFUSED, wait: :polynomially_longer, attempts: 3
  6. selenium_timeout_error = "Selenium::WebDriver::Error::TimeoutError".safe_constantize
  7. retry_on selenium_timeout_error, wait: :polynomially_longer, attempts: 2 if selenium_timeout_error
  8. def perform(instagram_account_id:, story_limit: STORY_BATCH_LIMIT, auto_reply_only: false)
  9. account = InstagramAccount.find(instagram_account_id)
  10. limit = story_limit.to_i.clamp(1, STORY_BATCH_LIMIT)
  11. tagged_only = ActiveModel::Type::Boolean.new.cast(auto_reply_only)
  12. result = Instagram::Client.new(account: account).sync_home_story_carousel!(
  13. story_limit: limit,
  14. auto_reply_only: tagged_only
  15. )
  16. has_failure = result[:stories_visited].to_i.zero? || result[:failed].to_i.positive?
  17. message =
  18. if has_failure
  19. "Home story sync finished with errors: visited=#{result[:stories_visited]}, failed=#{result[:failed]}, downloaded=#{result[:downloaded]}, analyzed=#{result[:analyzed]}, commented=#{result[:commented]}, reacted=#{result[:reacted]}, skipped_video=#{result[:skipped_video]}, skipped_ads=#{result[:skipped_ads]}, skipped_invalid_media=#{result[:skipped_invalid_media]}, skipped_unreplyable=#{result[:skipped_unreplyable]}, skipped_interaction_retry=#{result[:skipped_interaction_retry]}, skipped_reshared_external_link=#{result[:skipped_reshared_external_link]}, skipped_out_of_network=#{result[:skipped_out_of_network]}."
  20. else
  21. "Home story sync complete: visited=#{result[:stories_visited]}, downloaded=#{result[:downloaded]}, analyzed=#{result[:analyzed]}, commented=#{result[:commented]}, reacted=#{result[:reacted]}, skipped_video=#{result[:skipped_video]}, skipped_ads=#{result[:skipped_ads]}, skipped_invalid_media=#{result[:skipped_invalid_media]}, skipped_unreplyable=#{result[:skipped_unreplyable]}, skipped_interaction_retry=#{result[:skipped_interaction_retry]}, skipped_reshared_external_link=#{result[:skipped_reshared_external_link]}, skipped_out_of_network=#{result[:skipped_out_of_network]}."
  22. end
  23. Turbo::StreamsChannel.broadcast_append_to(
  24. account,
  25. target: "notifications",
  26. partial: "shared/notification",
  27. locals: {
  28. kind: has_failure ? "alert" : "notice",
  29. message: message
  30. }
  31. )
  32. rescue StandardError => e
  33. account ||= InstagramAccount.where(id: instagram_account_id).first
  34. Turbo::StreamsChannel.broadcast_append_to(
  35. account,
  36. target: "notifications",
  37. partial: "shared/notification",
  38. locals: { kind: "alert", message: "Home story sync failed: #{e.message}" }
  39. ) if account
  40. raise
  41. end
  42. end

app/jobs/sync_instagram_profile_stories_job.rb

0.0% lines covered

100.0% branches covered

983 relevant lines. 0 lines covered and 983 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "base64"
  2. require "net/http"
  3. require "digest"
  4. require "stringio"
  5. class SyncInstagramProfileStoriesJob < ApplicationJob
  6. queue_as :story_downloads
  7. MAX_INLINE_IMAGE_BYTES = 2 * 1024 * 1024
  8. MAX_INLINE_VIDEO_BYTES = 10 * 1024 * 1024
  9. MAX_STORIES = 10
  10. MAX_PREVIEW_IMAGE_BYTES = 3 * 1024 * 1024
  11. def perform(instagram_account_id:, instagram_profile_id:, profile_action_log_id: nil, max_stories: MAX_STORIES, force_analyze_all: false, auto_reply: false, require_auto_reply_tag: false)
  12. account = InstagramAccount.find(instagram_account_id)
  13. profile = account.instagram_profiles.find(instagram_profile_id)
  14. max_stories_i = max_stories.to_i.clamp(1, 10)
  15. force = ActiveModel::Type::Boolean.new.cast(force_analyze_all)
  16. auto_reply_enabled = ActiveModel::Type::Boolean.new.cast(auto_reply)
  17. action_log = find_or_create_action_log(
  18. account: account,
  19. profile: profile,
  20. action: auto_reply_enabled ? "auto_story_reply" : "sync_stories",
  21. profile_action_log_id: profile_action_log_id
  22. )
  23. tagged_for_auto_reply = automatic_reply_enabled?(profile)
  24. if require_auto_reply_tag && !tagged_for_auto_reply
  25. action_log.mark_succeeded!(log_text: "Skipped: automatic_reply tag not present", extra_metadata: { skipped: true, reason: "missing_automatic_reply_tag" })
  26. return
  27. end
  28. action_log.mark_running!(extra_metadata: {
  29. queue_name: queue_name,
  30. active_job_id: job_id,
  31. max_stories: max_stories_i,
  32. force_analyze_all: force,
  33. auto_reply: auto_reply_enabled
  34. })
  35. Ops::StructuredLogger.info(
  36. event: "profile_story_sync.started",
  37. payload: {
  38. active_job_id: job_id,
  39. instagram_account_id: account.id,
  40. instagram_profile_id: profile.id,
  41. profile_username: profile.username,
  42. max_stories: max_stories_i,
  43. force_analyze_all: force,
  44. auto_reply: auto_reply_enabled
  45. }
  46. )
  47. dataset = Instagram::Client.new(account: account).fetch_profile_story_dataset!(
  48. username: profile.username,
  49. stories_limit: max_stories_i
  50. )
  51. sync_profile_snapshot!(profile: profile, details: dataset[:profile] || {})
  52. stories = Array(dataset[:stories]).first(max_stories_i)
  53. downloaded_count = 0
  54. reused_download_count = 0
  55. analyzed_count = 0
  56. reply_queued_count = 0
  57. story_failures = []
  58. stories.each do |story|
  59. story_id = story[:story_id].to_s
  60. next if story_id.blank?
  61. # Capture HTML snapshot for debugging story skipping
  62. capture_story_html_snapshot(profile: profile, story: story, story_index: stories.find_index(story))
  63. if story[:api_should_skip]
  64. profile.record_event!(
  65. kind: "story_skipped_debug",
  66. external_id: "story_skipped_debug:#{story_id}:#{Time.current.utc.iso8601(6)}",
  67. occurred_at: Time.current,
  68. metadata: base_story_metadata(profile: profile, story: story).merge(
  69. skip_reason: story[:api_external_profile_reason].to_s.presence || "api_external_profile_indicator",
  70. skip_source: "api_story_item_attribution",
  71. skip_targets: Array(story[:api_external_profile_targets]),
  72. duplicate_download_prevented: latest_story_download_event(profile: profile, story_id: story_id).present?
  73. )
  74. )
  75. skipped_download = download_skipped_story!(
  76. account: account,
  77. profile: profile,
  78. story: story,
  79. skip_reason: story[:api_external_profile_reason].to_s.presence || "api_external_profile_indicator"
  80. )
  81. downloaded_count += 1 if skipped_download[:downloaded]
  82. reused_download_count += 1 if skipped_download[:reused]
  83. next
  84. end
  85. already_processed = already_processed_story?(profile: profile, story_id: story_id)
  86. if already_processed && !force
  87. profile.record_event!(
  88. kind: "story_skipped_debug",
  89. external_id: "story_skipped_debug:#{story_id}:#{Time.current.utc.iso8601(6)}",
  90. occurred_at: Time.current,
  91. metadata: base_story_metadata(profile: profile, story: story).merge(
  92. skip_reason: "already_processed",
  93. force_analyze_all: force,
  94. story_index: stories.find_index(story),
  95. total_stories: stories.size,
  96. duplicate_download_prevented: latest_story_download_event(profile: profile, story_id: story_id).present?
  97. )
  98. )
  99. next
  100. end
  101. upload_event = profile.record_event!(
  102. kind: "story_uploaded",
  103. external_id: "story_uploaded:#{story_id}",
  104. occurred_at: story[:taken_at],
  105. metadata: base_story_metadata(profile: profile, story: story)
  106. )
  107. viewed_at = Time.current
  108. profile.update!(last_story_seen_at: viewed_at)
  109. profile.recompute_last_active!
  110. profile.save!
  111. profile.record_event!(
  112. kind: "story_viewed",
  113. external_id: "story_viewed:#{story_id}:#{viewed_at.utc.iso8601(6)}",
  114. occurred_at: viewed_at,
  115. metadata: base_story_metadata(profile: profile, story: story).merge(viewed_at: viewed_at.iso8601)
  116. )
  117. media_url = story[:media_url].to_s.strip
  118. next if media_url.blank?
  119. existing_download_event = latest_story_download_event(profile: profile, story_id: story_id)
  120. reused_media = load_existing_story_media(event: existing_download_event)
  121. reused_media ||= load_cached_story_media_for_profile(
  122. account: account,
  123. profile: profile,
  124. story: story
  125. )
  126. if reused_media
  127. bytes = reused_media[:bytes]
  128. content_type = reused_media[:content_type]
  129. filename = reused_media[:filename]
  130. downloaded_event = reused_media[:event]
  131. reused_download_count += 1
  132. else
  133. bytes, content_type, filename = download_story_media(url: media_url, user_agent: account.user_agent)
  134. downloaded_at = Time.current
  135. downloaded_event = profile.record_event!(
  136. kind: "story_downloaded",
  137. external_id: "story_downloaded:#{story_id}:#{downloaded_at.utc.iso8601(6)}",
  138. occurred_at: downloaded_at,
  139. metadata: base_story_metadata(profile: profile, story: story).merge(
  140. downloaded_at: downloaded_at.iso8601,
  141. media_filename: filename,
  142. media_content_type: content_type,
  143. media_bytes: bytes.bytesize
  144. )
  145. )
  146. downloaded_event.media.attach(io: StringIO.new(bytes), filename: filename, content_type: content_type)
  147. InstagramProfileEvent.broadcast_story_archive_refresh!(account: account)
  148. downloaded_count += 1
  149. end
  150. attach_media_to_event(upload_event, bytes: bytes, filename: filename, content_type: content_type)
  151. ensure_story_preview_image!(
  152. event: downloaded_event,
  153. story: story,
  154. media_bytes: bytes,
  155. media_content_type: content_type,
  156. user_agent: account.user_agent
  157. )
  158. ingested_story = ingest_story_for_processing(
  159. account: account,
  160. profile: profile,
  161. story: story,
  162. downloaded_event: downloaded_event,
  163. bytes: bytes,
  164. content_type: content_type,
  165. filename: filename,
  166. force_reprocess: force
  167. )
  168. analysis = analyze_story_for_comments(
  169. account: account,
  170. profile: profile,
  171. story: story,
  172. analyzable: downloaded_event,
  173. media_fingerprint: media_fingerprint_for_story(story: story, bytes: bytes, content_type: content_type),
  174. bytes: bytes,
  175. content_type: content_type
  176. )
  177. next unless analysis[:ok]
  178. analyzed_at = Time.current
  179. profile.record_event!(
  180. kind: "story_analyzed",
  181. external_id: "story_analyzed:#{story_id}:#{analyzed_at.utc.iso8601(6)}",
  182. occurred_at: analyzed_at,
  183. metadata: base_story_metadata(profile: profile, story: story).merge(
  184. analyzed_at: analyzed_at.iso8601,
  185. ai_provider: analysis[:provider],
  186. ai_model: analysis[:model],
  187. ai_image_description: analysis[:image_description],
  188. ai_comment_suggestions: analysis[:comment_suggestions],
  189. instagram_story_id: ingested_story&.id
  190. )
  191. )
  192. analyzed_count += 1
  193. if auto_reply_enabled
  194. decision = story_reply_decision(analysis: analysis, profile: profile, story_id: story_id)
  195. if decision[:queue]
  196. queued = queue_story_reply!(
  197. account: account,
  198. profile: profile,
  199. story: story,
  200. analysis: analysis,
  201. downloaded_event: downloaded_event
  202. )
  203. reply_queued_count += 1 if queued
  204. else
  205. profile.record_event!(
  206. kind: "story_reply_skipped",
  207. external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
  208. occurred_at: Time.current,
  209. metadata: base_story_metadata(profile: profile, story: story).merge(
  210. skip_reason: decision[:reason],
  211. relevant: analysis[:relevant],
  212. author_type: analysis[:author_type],
  213. suggestions_count: Array(analysis[:comment_suggestions]).length
  214. )
  215. )
  216. end
  217. end
  218. rescue StandardError => e
  219. story_failures << {
  220. story_id: story_id.presence || story[:story_id].to_s,
  221. error_class: e.class.name,
  222. error_message: e.message.to_s.byteslice(0, 220)
  223. }
  224. Ops::StructuredLogger.warn(
  225. event: "profile_story_sync.story_failed",
  226. payload: {
  227. active_job_id: job_id,
  228. instagram_account_id: account.id,
  229. instagram_profile_id: profile.id,
  230. profile_username: profile.username,
  231. story_id: story_id.presence || story[:story_id].to_s,
  232. error_class: e.class.name,
  233. error_message: e.message.to_s
  234. }
  235. )
  236. next
  237. end
  238. Ops::StructuredLogger.info(
  239. event: "profile_story_sync.completed",
  240. payload: {
  241. active_job_id: job_id,
  242. instagram_account_id: account.id,
  243. instagram_profile_id: profile.id,
  244. profile_username: profile.username,
  245. stories_found: stories.size,
  246. downloaded: downloaded_count,
  247. reused_downloads: reused_download_count,
  248. analyzed: analyzed_count,
  249. replies_queued: reply_queued_count,
  250. failed_story_count: story_failures.length
  251. }
  252. )
  253. Turbo::StreamsChannel.broadcast_append_to(
  254. account,
  255. target: "notifications",
  256. partial: "shared/notification",
  257. locals: { kind: "notice", message: "Story sync completed for #{profile.username}. Stories: #{stories.size}, downloaded: #{downloaded_count}, reused: #{reused_download_count}, analyzed: #{analyzed_count}, replies queued: #{reply_queued_count}, failed: #{story_failures.length}." }
  258. )
  259. action_log.mark_succeeded!(
  260. extra_metadata: {
  261. stories_found: stories.size,
  262. downloaded: downloaded_count,
  263. reused_downloads: reused_download_count,
  264. analyzed: analyzed_count,
  265. replies_queued: reply_queued_count,
  266. failed_story_count: story_failures.length,
  267. failed_stories: story_failures.first(15)
  268. },
  269. log_text: "Synced #{stories.size} stories (downloaded: #{downloaded_count}, reused: #{reused_download_count}, analyzed: #{analyzed_count}, replies queued: #{reply_queued_count}, failed: #{story_failures.length})"
  270. )
  271. rescue StandardError => e
  272. Ops::StructuredLogger.error(
  273. event: "profile_story_sync.failed",
  274. payload: {
  275. active_job_id: job_id,
  276. instagram_account_id: account&.id,
  277. instagram_profile_id: profile&.id,
  278. profile_username: profile&.username,
  279. error_class: e.class.name,
  280. error_message: e.message.to_s
  281. }
  282. )
  283. Turbo::StreamsChannel.broadcast_append_to(
  284. account,
  285. target: "notifications",
  286. partial: "shared/notification",
  287. locals: { kind: "alert", message: "Story sync failed: #{e.message}" }
  288. ) if account
  289. action_log&.mark_failed!(error_message: e.message, extra_metadata: { active_job_id: job_id })
  290. raise
  291. end
  292. private
  293. def find_or_create_action_log(account:, profile:, action:, profile_action_log_id:)
  294. log = profile.instagram_profile_action_logs.find_by(id: profile_action_log_id) if profile_action_log_id.present?
  295. return log if log
  296. profile.instagram_profile_action_logs.create!(
  297. instagram_account: account,
  298. action: action,
  299. status: "queued",
  300. trigger_source: "job",
  301. occurred_at: Time.current,
  302. active_job_id: job_id,
  303. queue_name: queue_name,
  304. metadata: { created_by: self.class.name }
  305. )
  306. end
  307. def sync_profile_snapshot!(profile:, details:)
  308. profile.update!(
  309. display_name: details[:display_name].presence || profile.display_name,
  310. profile_pic_url: details[:profile_pic_url].presence || profile.profile_pic_url,
  311. ig_user_id: details[:ig_user_id].presence || profile.ig_user_id,
  312. bio: details[:bio].presence || profile.bio,
  313. last_post_at: details[:last_post_at].presence || profile.last_post_at
  314. )
  315. profile.recompute_last_active!
  316. profile.save!
  317. end
  318. def base_story_metadata(profile:, story:)
  319. {
  320. source: "instagram_story_reel_api",
  321. story_id: story[:story_id],
  322. media_type: story[:media_type],
  323. media_url: story[:media_url],
  324. image_url: story[:image_url],
  325. video_url: story[:video_url],
  326. primary_media_source: story[:primary_media_source],
  327. primary_media_index: story[:primary_media_index],
  328. media_variants_count: Array(story[:media_variants]).length,
  329. carousel_media: compact_story_media_variants(story[:carousel_media]),
  330. can_reply: story[:can_reply],
  331. can_reshare: story[:can_reshare],
  332. owner_user_id: story[:owner_user_id],
  333. owner_username: story[:owner_username],
  334. api_has_external_profile_indicator: story[:api_has_external_profile_indicator],
  335. api_external_profile_reason: story[:api_external_profile_reason],
  336. api_external_profile_targets: story[:api_external_profile_targets],
  337. api_should_skip: story[:api_should_skip],
  338. caption: story[:caption],
  339. permalink: story[:permalink],
  340. upload_time: story[:taken_at]&.iso8601,
  341. expiring_at: story[:expiring_at]&.iso8601,
  342. profile_context: {
  343. username: profile.username,
  344. display_name: profile.display_name,
  345. can_message: profile.can_message,
  346. tags: profile.profile_tags.pluck(:name).sort,
  347. bio: profile.bio.to_s.tr("\n", " ").byteslice(0, 260)
  348. }
  349. }
  350. end
  351. def compact_story_media_variants(variants)
  352. Array(variants).first(8).filter_map do |entry|
  353. data = entry.is_a?(Hash) ? entry : {}
  354. media_url = data[:media_url] || data["media_url"]
  355. next nil if media_url.to_s.blank?
  356. {
  357. source: (data[:source] || data["source"]).to_s.presence,
  358. index: data[:index] || data["index"],
  359. media_pk: (data[:media_pk] || data["media_pk"]).to_s.presence,
  360. media_type: (data[:media_type] || data["media_type"]).to_s.presence,
  361. media_url: media_url.to_s,
  362. image_url: (data[:image_url] || data["image_url"]).to_s.presence,
  363. video_url: (data[:video_url] || data["video_url"]).to_s.presence,
  364. width: data[:width] || data["width"],
  365. height: data[:height] || data["height"]
  366. }.compact
  367. end
  368. rescue StandardError
  369. []
  370. end
  371. def automatic_reply_enabled?(profile)
  372. profile.profile_tags.where(name: [ "automatic_reply", "automatic reply" ]).exists?
  373. end
  374. def already_processed_story?(profile:, story_id:)
  375. profile.instagram_profile_events.where(kind: "story_uploaded", external_id: "story_uploaded:#{story_id}").exists?
  376. end
  377. def attach_media_to_event(event, bytes:, filename:, content_type:)
  378. return unless event
  379. return if event.media.attached?
  380. event.media.attach(io: StringIO.new(bytes), filename: filename, content_type: content_type)
  381. rescue StandardError
  382. nil
  383. end
  384. def analyze_story_for_comments(account:, profile:, story:, analyzable:, media_fingerprint:, bytes:, content_type:)
  385. media_payload = build_media_payload(story: story, bytes: bytes, content_type: content_type)
  386. payload = build_story_payload(profile: profile, story: story)
  387. run = Ai::Runner.new(account: account).analyze!(
  388. purpose: "post",
  389. analyzable: analyzable,
  390. payload: payload,
  391. media: media_payload,
  392. media_fingerprint: media_fingerprint
  393. )
  394. analysis = run.dig(:result, :analysis)
  395. return { ok: false } unless analysis.is_a?(Hash)
  396. {
  397. ok: true,
  398. provider: run[:provider].key,
  399. model: run.dig(:result, :model),
  400. relevant: analysis["relevant"],
  401. author_type: analysis["author_type"],
  402. image_description: analysis["image_description"].to_s.presence,
  403. comment_suggestions: Array(analysis["comment_suggestions"]).first(8)
  404. }
  405. rescue StandardError
  406. { ok: false }
  407. end
  408. def media_fingerprint_for_story(story:, bytes:, content_type:)
  409. return Digest::SHA256.hexdigest(bytes) if bytes.present?
  410. fallback = [
  411. story[:media_url].to_s,
  412. story[:image_url].to_s,
  413. story[:video_url].to_s,
  414. content_type.to_s
  415. ].find(&:present?)
  416. return nil if fallback.blank?
  417. Digest::SHA256.hexdigest(fallback)
  418. end
  419. def ensure_story_preview_image!(event:, story:, media_bytes:, media_content_type:, user_agent:)
  420. return false unless event&.media&.attached?
  421. return false unless event.media.blob&.content_type.to_s.start_with?("video/")
  422. return true if event.preview_image.attached?
  423. preview_url = preferred_story_preview_url(story: story)
  424. if preview_url.present?
  425. downloaded = download_preview_image(url: preview_url, user_agent: user_agent)
  426. if downloaded
  427. attach_preview_image_bytes!(
  428. event: event,
  429. image_bytes: downloaded[:bytes],
  430. content_type: downloaded[:content_type],
  431. filename: downloaded[:filename]
  432. )
  433. stamp_story_preview_metadata!(event: event, source: "remote_image_url")
  434. return true
  435. end
  436. end
  437. extracted = VideoThumbnailService.new.extract_first_frame(
  438. video_bytes: media_bytes.to_s.b,
  439. reference_id: "story_event_#{event.id}",
  440. content_type: media_content_type
  441. )
  442. return false unless extracted[:ok]
  443. attach_preview_image_bytes!(
  444. event: event,
  445. image_bytes: extracted[:image_bytes],
  446. content_type: extracted[:content_type],
  447. filename: extracted[:filename]
  448. )
  449. stamp_story_preview_metadata!(event: event, source: "ffmpeg_first_frame")
  450. true
  451. rescue StandardError => e
  452. Rails.logger.warn("[SyncInstagramProfileStoriesJob] preview attach failed event_id=#{event&.id}: #{e.class}: #{e.message}")
  453. false
  454. end
  455. def preferred_story_preview_url(story:)
  456. candidates = [
  457. story[:image_url].to_s,
  458. story[:thumbnail_url].to_s,
  459. story[:preview_image_url].to_s
  460. ]
  461. Array(story[:carousel_media]).each do |entry|
  462. data = entry.is_a?(Hash) ? entry : {}
  463. candidates << data[:image_url].to_s
  464. candidates << data["image_url"].to_s
  465. end
  466. candidates.map(&:strip).find(&:present?)
  467. rescue StandardError
  468. nil
  469. end
  470. def download_preview_image(url:, user_agent:, redirects_left: 3)
  471. uri = URI.parse(url)
  472. return nil unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
  473. http = Net::HTTP.new(uri.host, uri.port)
  474. http.use_ssl = (uri.scheme == "https")
  475. http.open_timeout = 8
  476. http.read_timeout = 20
  477. req = Net::HTTP::Get.new(uri.request_uri)
  478. req["Accept"] = "image/*,*/*;q=0.8"
  479. req["User-Agent"] = user_agent.to_s.presence || "Mozilla/5.0"
  480. req["Referer"] = Instagram::Client::INSTAGRAM_BASE_URL
  481. res = http.request(req)
  482. if res.is_a?(Net::HTTPRedirection) && res["location"].present?
  483. return nil if redirects_left.to_i <= 0
  484. redirected_url = normalize_redirect_url(base_uri: uri, location: res["location"])
  485. return nil if redirected_url.blank?
  486. return download_preview_image(url: redirected_url, user_agent: user_agent, redirects_left: redirects_left.to_i - 1)
  487. end
  488. return nil unless res.is_a?(Net::HTTPSuccess)
  489. body = res.body.to_s.b
  490. return nil if body.bytesize <= 0 || body.bytesize > MAX_PREVIEW_IMAGE_BYTES
  491. return nil if html_payload?(body)
  492. content_type = res["content-type"].to_s.split(";").first.to_s
  493. return nil unless content_type.start_with?("image/")
  494. validate_known_signature!(body: body, content_type: content_type)
  495. ext = extension_for_content_type(content_type: content_type)
  496. {
  497. bytes: body,
  498. content_type: content_type,
  499. filename: "story_preview_#{Digest::SHA256.hexdigest(url)[0, 12]}.#{ext}"
  500. }
  501. rescue StandardError
  502. nil
  503. end
  504. def attach_preview_image_bytes!(event:, image_bytes:, content_type:, filename:)
  505. blob = ActiveStorage::Blob.create_and_upload!(
  506. io: StringIO.new(image_bytes),
  507. filename: filename,
  508. content_type: content_type.to_s.presence || "image/jpeg",
  509. identify: false
  510. )
  511. attach_preview_blob_to_event!(event: event, blob: blob)
  512. end
  513. def attach_preview_blob_to_event!(event:, blob:)
  514. return unless blob
  515. if event.preview_image.attached? && event.preview_image.attachment.present?
  516. attachment = event.preview_image.attachment
  517. attachment.update!(blob: blob) if attachment.blob_id != blob.id
  518. return
  519. end
  520. event.preview_image.attach(blob)
  521. end
  522. def stamp_story_preview_metadata!(event:, source:)
  523. metadata = event.metadata.is_a?(Hash) ? event.metadata.deep_dup : {}
  524. metadata["preview_image_status"] = "attached"
  525. metadata["preview_image_source"] = source.to_s
  526. metadata["preview_image_attached_at"] = Time.current.utc.iso8601(3)
  527. event.update!(metadata: metadata)
  528. rescue StandardError
  529. nil
  530. end
  531. def build_story_payload(profile:, story:)
  532. story_history = recent_story_history_context(profile: profile)
  533. history_narrative = profile.history_narrative_text(max_chunks: 3)
  534. history_chunks = profile.history_narrative_chunks(max_chunks: 6)
  535. recent_post_context = profile.instagram_profile_posts.recent_first.limit(5).map do |p|
  536. {
  537. shortcode: p.shortcode,
  538. caption: p.caption.to_s,
  539. taken_at: p.taken_at&.iso8601,
  540. image_description: p.analysis.is_a?(Hash) ? p.analysis["image_description"] : nil,
  541. topics: p.analysis.is_a?(Hash) ? Array(p.analysis["topics"]).first(6) : []
  542. }
  543. end
  544. recent_event_context = profile.instagram_profile_events.order(detected_at: :desc).limit(20).pluck(:kind, :occurred_at).map do |kind, occurred_at|
  545. { kind: kind, occurred_at: occurred_at&.iso8601 }
  546. end
  547. {
  548. post: {
  549. shortcode: story[:story_id],
  550. caption: story[:caption],
  551. taken_at: story[:taken_at]&.iso8601,
  552. permalink: story[:permalink],
  553. likes_count: nil,
  554. comments_count: nil,
  555. comments: []
  556. },
  557. author_profile: {
  558. username: profile.username,
  559. display_name: profile.display_name,
  560. bio: profile.bio,
  561. can_message: profile.can_message,
  562. tags: profile.profile_tags.pluck(:name).sort,
  563. recent_posts: recent_post_context,
  564. recent_profile_events: recent_event_context,
  565. recent_story_history: story_history,
  566. historical_narrative_text: history_narrative,
  567. historical_narrative_chunks: history_chunks
  568. },
  569. rules: {
  570. require_manual_review: true,
  571. style: "gen_z_light",
  572. context: "story_reply_suggestion",
  573. only_if_relevant: true,
  574. diversity_requirement: "Prefer novel comments and avoid repeating previous story replies."
  575. }
  576. }
  577. end
  578. def story_reply_decision(analysis:, profile:, story_id:)
  579. return { queue: false, reason: "already_sent" } if story_reply_already_sent?(profile: profile, story_id: story_id)
  580. return { queue: false, reason: "official_messaging_not_configured" } unless official_messaging_service.configured?
  581. relevant = analysis[:relevant]
  582. author_type = analysis[:author_type].to_s
  583. suggestions = Array(analysis[:comment_suggestions]).map(&:to_s).reject(&:blank?)
  584. return { queue: false, reason: "no_comment_suggestions" } if suggestions.empty?
  585. return { queue: false, reason: "not_relevant" } unless relevant == true
  586. allowed_types = %w[personal_user friend relative unknown]
  587. return { queue: false, reason: "author_type_#{author_type.presence || 'missing'}_not_allowed" } unless allowed_types.include?(author_type)
  588. { queue: true, reason: "eligible_for_reply" }
  589. end
  590. def story_reply_already_sent?(profile:, story_id:)
  591. profile.instagram_profile_events.where(kind: "story_reply_sent", external_id: "story_reply_sent:#{story_id}").exists?
  592. end
  593. def queue_story_reply!(account:, profile:, story:, analysis:, downloaded_event: nil)
  594. story_id = story[:story_id].to_s
  595. suggestion = select_unique_story_comment(profile: profile, suggestions: Array(analysis[:comment_suggestions]))
  596. return false if suggestion.blank?
  597. result = official_messaging_service.send_text!(
  598. recipient_id: profile.ig_user_id.presence || profile.username,
  599. text: suggestion,
  600. context: {
  601. source: "story_auto_reply",
  602. story_id: story_id
  603. }
  604. )
  605. message = account.instagram_messages.create!(
  606. instagram_profile: profile,
  607. direction: "outgoing",
  608. body: suggestion,
  609. status: "sent",
  610. sent_at: Time.current
  611. )
  612. profile.record_event!(
  613. kind: "story_reply_sent",
  614. external_id: "story_reply_sent:#{story_id}",
  615. occurred_at: Time.current,
  616. metadata: base_story_metadata(profile: profile, story: story).merge(
  617. ai_reply_text: suggestion,
  618. auto_reply: true,
  619. instagram_message_id: message.id,
  620. provider_message_id: result[:provider_message_id]
  621. )
  622. )
  623. attach_reply_comment_to_downloaded_event!(downloaded_event: downloaded_event, comment_text: suggestion)
  624. true
  625. rescue StandardError => e
  626. account.instagram_messages.create!(
  627. instagram_profile: profile,
  628. direction: "outgoing",
  629. body: suggestion.to_s,
  630. status: "failed",
  631. error_message: e.message.to_s
  632. ) if suggestion.present?
  633. false
  634. end
  635. def official_messaging_service
  636. @official_messaging_service ||= Messaging::IntegrationService.new
  637. end
  638. def attach_reply_comment_to_downloaded_event!(downloaded_event:, comment_text:)
  639. return if downloaded_event.blank? || comment_text.blank?
  640. meta = downloaded_event.metadata.is_a?(Hash) ? downloaded_event.metadata.deep_dup : {}
  641. meta["reply_comment"] = comment_text.to_s
  642. downloaded_event.update!(metadata: meta)
  643. end
  644. def download_skipped_story!(account:, profile:, story:, skip_reason:)
  645. story_id = story[:story_id].to_s
  646. existing_event = latest_story_download_event(profile: profile, story_id: story_id)
  647. if existing_event&.media&.attached?
  648. return { downloaded: false, reused: true, event: existing_event }
  649. end
  650. reused_media = load_cached_story_media_for_profile(
  651. account: account,
  652. profile: profile,
  653. story: story,
  654. skip_reason: skip_reason
  655. )
  656. return { downloaded: false, reused: true, event: reused_media[:event] } if reused_media
  657. media_url = story[:media_url].to_s.strip
  658. return { downloaded: false, reused: false, event: nil } if media_url.blank?
  659. bytes, content_type, filename = download_story_media(url: media_url, user_agent: account.user_agent)
  660. downloaded_at = Time.current
  661. event = profile.record_event!(
  662. kind: "story_downloaded",
  663. external_id: "story_downloaded:#{story_id}:#{downloaded_at.utc.iso8601(6)}",
  664. occurred_at: downloaded_at,
  665. metadata: base_story_metadata(profile: profile, story: story).merge(
  666. skipped: true,
  667. skip_reason: skip_reason.to_s,
  668. downloaded_at: downloaded_at.iso8601,
  669. media_filename: filename,
  670. media_content_type: content_type,
  671. media_bytes: bytes.bytesize
  672. )
  673. )
  674. event.media.attach(io: StringIO.new(bytes), filename: filename, content_type: content_type)
  675. InstagramProfileEvent.broadcast_story_archive_refresh!(account: account)
  676. { downloaded: true, reused: false, event: event }
  677. rescue StandardError
  678. { downloaded: false, reused: false, event: nil }
  679. end
  680. def recent_story_history_context(profile:)
  681. profile.instagram_profile_events
  682. .where(kind: [ "story_analyzed", "story_reply_sent", "story_comment_posted_via_feed" ])
  683. .order(detected_at: :desc, id: :desc)
  684. .limit(25)
  685. .map do |event|
  686. metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
  687. {
  688. kind: event.kind,
  689. occurred_at: event.occurred_at&.iso8601 || event.detected_at&.iso8601,
  690. story_id: metadata["story_id"].to_s.presence,
  691. image_description: metadata["ai_image_description"].to_s.presence,
  692. posted_comment: metadata["ai_reply_text"].to_s.presence || metadata["comment_text"].to_s.presence
  693. }.compact
  694. end
  695. end
  696. def select_unique_story_comment(profile:, suggestions:)
  697. candidates = Array(suggestions).map(&:to_s).map(&:strip).reject(&:blank?)
  698. return nil if candidates.empty?
  699. history = profile.instagram_profile_events
  700. .where(kind: [ "story_reply_sent", "story_comment_posted_via_feed" ])
  701. .order(detected_at: :desc, id: :desc)
  702. .limit(40)
  703. .map { |e| e.metadata.is_a?(Hash) ? (e.metadata["ai_reply_text"].to_s.presence || e.metadata["comment_text"].to_s) : "" }
  704. .reject(&:blank?)
  705. return candidates.first if history.empty?
  706. ranked = candidates.sort_by do |candidate|
  707. max_similarity = history.map { |past| text_similarity(candidate, past) }.max.to_f
  708. max_similarity
  709. end
  710. ranked.find { |c| history.all? { |past| text_similarity(c, past) < 0.72 } } || ranked.first
  711. end
  712. def text_similarity(a, b)
  713. left = tokenize(a)
  714. right = tokenize(b)
  715. return 0.0 if left.empty? || right.empty?
  716. overlap = (left & right).length.to_f
  717. overlap / [ left.length, right.length ].max.to_f
  718. end
  719. def tokenize(text)
  720. text.to_s.downcase.scan(/[a-z0-9]+/).uniq
  721. end
  722. def build_media_payload(story:, bytes:, content_type:)
  723. media_type = story[:media_type].to_s
  724. if media_type == "video"
  725. {
  726. type: "video",
  727. content_type: content_type,
  728. bytes: bytes.bytesize <= MAX_INLINE_VIDEO_BYTES ? bytes : nil
  729. }
  730. else
  731. payload = {
  732. type: "image",
  733. content_type: content_type,
  734. bytes: bytes
  735. }
  736. if bytes.bytesize <= MAX_INLINE_IMAGE_BYTES
  737. payload[:image_data_url] = "data:#{content_type};base64,#{Base64.strict_encode64(bytes)}"
  738. end
  739. payload
  740. end
  741. end
  742. def download_story_media(url:, user_agent:)
  743. uri = URI.parse(url)
  744. raise "Invalid story media URL" unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
  745. http = Net::HTTP.new(uri.host, uri.port)
  746. http.use_ssl = (uri.scheme == "https")
  747. http.open_timeout = 10
  748. http.read_timeout = 30
  749. req = Net::HTTP::Get.new(uri.request_uri)
  750. req["User-Agent"] = user_agent.presence || "Mozilla/5.0"
  751. req["Accept"] = "*/*"
  752. req["Referer"] = "https://www.instagram.com/"
  753. res = http.request(req)
  754. if res.is_a?(Net::HTTPRedirection) && res["location"].present?
  755. return download_story_media(url: res["location"], user_agent: user_agent)
  756. end
  757. raise "HTTP #{res.code}" unless res.is_a?(Net::HTTPSuccess)
  758. bytes = res.body.to_s
  759. raise "Empty story media body" if bytes.blank?
  760. content_type = res["content-type"].to_s.split(";").first.presence || "application/octet-stream"
  761. ext = extension_for_content_type(content_type: content_type)
  762. digest = Digest::SHA256.hexdigest("#{uri.path}-#{bytes.bytesize}")[0, 12]
  763. filename = "story_#{digest}.#{ext}"
  764. [ bytes, content_type, filename ]
  765. end
  766. def extension_for_content_type(content_type:)
  767. return "jpg" if content_type.include?("jpeg")
  768. return "png" if content_type.include?("png")
  769. return "webp" if content_type.include?("webp")
  770. return "mp4" if content_type.include?("mp4")
  771. return "mov" if content_type.include?("quicktime")
  772. "bin"
  773. end
  774. def normalize_redirect_url(base_uri:, location:)
  775. target = URI.join(base_uri.to_s, location.to_s).to_s
  776. parsed = URI.parse(target)
  777. return nil unless parsed.is_a?(URI::HTTP) || parsed.is_a?(URI::HTTPS)
  778. parsed.to_s
  779. rescue URI::InvalidURIError, ArgumentError
  780. nil
  781. end
  782. def html_payload?(body)
  783. sample = body.to_s.byteslice(0, 4096).to_s.downcase
  784. sample.include?("<html") || sample.start_with?("<!doctype html")
  785. end
  786. def validate_known_signature!(body:, content_type:)
  787. type = content_type.to_s.downcase
  788. return if type.blank?
  789. return if type.include?("octet-stream")
  790. case
  791. when type.include?("jpeg")
  792. raise "invalid jpeg signature" unless body.start_with?("\xFF\xD8".b)
  793. when type.include?("png")
  794. raise "invalid png signature" unless body.start_with?("\x89PNG\r\n\x1A\n".b)
  795. when type.include?("gif")
  796. raise "invalid gif signature" unless body.start_with?("GIF87a".b) || body.start_with?("GIF89a".b)
  797. when type.include?("webp")
  798. raise "invalid webp signature" unless body.bytesize >= 12 && body.byteslice(0, 4) == "RIFF" && body.byteslice(8, 4) == "WEBP"
  799. when type.start_with?("video/")
  800. raise "invalid video signature" unless body.bytesize >= 12 && body.byteslice(4, 4) == "ftyp"
  801. end
  802. end
  803. def ingest_story_for_processing(account:, profile:, story:, downloaded_event:, bytes:, content_type:, filename:, force_reprocess:)
  804. StoryIngestionService.new(account: account, profile: profile).ingest!(
  805. story: story,
  806. source_event: downloaded_event,
  807. bytes: bytes,
  808. content_type: content_type,
  809. filename: filename,
  810. force_reprocess: force_reprocess
  811. )
  812. rescue StandardError => e
  813. Rails.logger.warn("[SyncInstagramProfileStoriesJob] story ingestion failed story_id=#{story[:story_id]}: #{e.class}: #{e.message}")
  814. nil
  815. end
  816. def latest_story_download_event(profile:, story_id:)
  817. profile.instagram_profile_events
  818. .joins(:media_attachment)
  819. .with_attached_media
  820. .where(kind: "story_downloaded")
  821. .where("external_id LIKE ?", "story_downloaded:#{story_id}:%")
  822. .order(detected_at: :desc, id: :desc)
  823. .first
  824. end
  825. def load_existing_story_media(event:)
  826. return nil unless event&.media&.attached?
  827. blob = event.media.blob
  828. {
  829. event: event,
  830. bytes: blob.download,
  831. content_type: blob.content_type.to_s.presence || "application/octet-stream",
  832. filename: blob.filename.to_s.presence || "story_#{event.id}.bin"
  833. }
  834. rescue StandardError
  835. nil
  836. end
  837. def load_cached_story_media_for_profile(account:, profile:, story:, skip_reason: nil)
  838. story_id = story[:story_id].to_s.strip
  839. return nil if story_id.blank?
  840. cache_hit = find_cached_story_media(story_id: story_id, excluding_profile_id: profile.id)
  841. return nil unless cache_hit
  842. event = build_cached_story_download_event(
  843. account: account,
  844. profile: profile,
  845. story: story,
  846. story_id: story_id,
  847. blob: cache_hit[:blob],
  848. cache_source: cache_hit[:source],
  849. cache_source_id: cache_hit[:source_id],
  850. skip_reason: skip_reason
  851. )
  852. return nil unless event
  853. load_existing_story_media(event: event)
  854. rescue StandardError => e
  855. Rails.logger.warn("[SyncInstagramProfileStoriesJob] cached media reuse failed for story_id=#{story_id}: #{e.class}: #{e.message}")
  856. nil
  857. end
  858. def find_cached_story_media(story_id:, excluding_profile_id:)
  859. cached_story = InstagramStory
  860. .joins(:media_attachment)
  861. .where(story_id: story_id)
  862. .where.not(instagram_profile_id: excluding_profile_id)
  863. .order(taken_at: :desc, id: :desc)
  864. .first
  865. if cached_story&.media&.attached?
  866. return { blob: cached_story.media.blob, source: "instagram_story", source_id: cached_story.id }
  867. end
  868. cached_event = InstagramProfileEvent
  869. .joins(:media_attachment)
  870. .with_attached_media
  871. .where(kind: "story_downloaded")
  872. .where.not(instagram_profile_id: excluding_profile_id)
  873. .where("external_id LIKE ?", "story_downloaded:#{story_id}:%")
  874. .order(detected_at: :desc, id: :desc)
  875. .first
  876. return nil unless cached_event&.media&.attached?
  877. { blob: cached_event.media.blob, source: "instagram_profile_event", source_id: cached_event.id }
  878. end
  879. def build_cached_story_download_event(account:, profile:, story:, story_id:, blob:, cache_source:, cache_source_id:, skip_reason: nil)
  880. downloaded_at = Time.current
  881. metadata = base_story_metadata(profile: profile, story: story).merge(
  882. downloaded_at: downloaded_at.iso8601,
  883. media_filename: blob.filename.to_s,
  884. media_content_type: blob.content_type.to_s,
  885. media_bytes: blob.byte_size.to_i,
  886. reused_local_cache: true,
  887. reused_local_cache_source: cache_source.to_s,
  888. reused_local_cache_source_id: cache_source_id
  889. )
  890. metadata[:skip_reason] = skip_reason.to_s if skip_reason.present?
  891. metadata[:skipped] = true if skip_reason.present?
  892. event = profile.record_event!(
  893. kind: "story_downloaded",
  894. external_id: "story_downloaded:#{story_id}:#{downloaded_at.utc.iso8601(6)}",
  895. occurred_at: downloaded_at,
  896. metadata: metadata
  897. )
  898. event.media.attach(blob) unless event.media.attached?
  899. InstagramProfileEvent.broadcast_story_archive_refresh!(account: account)
  900. event
  901. end
  902. def capture_story_html_snapshot(profile:, story:, story_index:)
  903. return unless story.present?
  904. begin
  905. # Create debug directory if it doesn't exist
  906. debug_dir = Rails.root.join("tmp", "story_debug_snapshots")
  907. FileUtils.mkdir_p(debug_dir) unless Dir.exist?(debug_dir)
  908. # Generate filename with timestamp and story info
  909. timestamp = Time.current.strftime("%Y%m%d_%H%M%S_%L")
  910. filename = "#{profile.username}_story_#{story_index}_#{story[:story_id]}_#{timestamp}.html"
  911. filepath = File.join(debug_dir, filename)
  912. # Create HTML content with story metadata and DOM structure analysis
  913. html_content = <<~HTML
  914. <!DOCTYPE html>
  915. <html>
  916. <head>
  917. <title>Story Debug Snapshot - #{profile.username} - Story #{story_index}</title>
  918. <style>
  919. body { font-family: Arial, sans-serif; margin: 20px; }
  920. .header { background: #f0f0f0; padding: 10px; border-radius: 5px; margin-bottom: 20px; }
  921. .metadata { background: #fff9e6; padding: 10px; border-radius: 5px; margin-bottom: 20px; }
  922. .analysis { background: #e6f3ff; padding: 10px; border-radius: 5px; margin-bottom: 20px; }
  923. .events { background: #ffe6e6; padding: 10px; border-radius: 5px; }
  924. pre { background: #f5f5f5; padding: 10px; border-radius: 3px; overflow-x: auto; }
  925. .story-id { color: #0066cc; font-weight: bold; }
  926. .skip-reason { color: #cc0000; font-weight: bold; }
  927. </style>
  928. </head>
  929. <body>
  930. <div class="header">
  931. <h1>Story Debug Snapshot</h1>
  932. <p><strong>Profile:</strong> #{profile.username} (ID: #{profile.id})</p>
  933. <p><strong>Story Index:</strong> #{story_index} / #{Array(story).size}</p>
  934. <p><strong>Captured At:</strong> #{Time.current.iso8601}</p>
  935. </div>
  936. <div class="metadata">
  937. <h2>Story Metadata</h2>
  938. <pre>#{JSON.pretty_generate(story)}</pre>
  939. </div>
  940. <div class="analysis">
  941. <h2>Processing Analysis</h2>
  942. <p><strong>Story ID:</strong> <span class="story-id">#{story[:story_id]}</span></p>
  943. <p><strong>Already Processed:</strong> #{already_processed_story?(profile: profile, story_id: story[:story_id].to_s)}</p>
  944. <p><strong>Media URL:</strong> #{story[:media_url]}</p>
  945. <p><strong>Taken At:</strong> #{story[:taken_at]}</p>
  946. <p><strong>Expiring At:</strong> #{story[:expiring_at]}</p>
  947. </div>
  948. <div class="events">
  949. <h2>Recent Story Events for this Profile</h2>
  950. <pre>#{JSON.pretty_generate(recent_story_events_for_debug(profile: profile))}</pre>
  951. </div>
  952. </body>
  953. </html>
  954. HTML
  955. # Write HTML snapshot to file
  956. File.write(filepath, html_content)
  957. # Log the snapshot creation
  958. Rails.logger.info "[STORY_DEBUG] HTML snapshot created: #{filepath}"
  959. # Record snapshot event in the database
  960. profile.record_event!(
  961. kind: "story_html_snapshot",
  962. external_id: "story_html_snapshot:#{story[:story_id]}:#{timestamp}",
  963. occurred_at: Time.current,
  964. metadata: base_story_metadata(profile: profile, story: story).merge(
  965. snapshot_filename: filename,
  966. snapshot_path: filepath,
  967. story_index: story_index,
  968. captured_at: Time.current.iso8601
  969. )
  970. )
  971. rescue StandardError => e
  972. Rails.logger.error "[STORY_DEBUG] Failed to capture HTML snapshot: #{e.message}"
  973. # Don't fail the entire job if snapshot capture fails
  974. end
  975. end
  976. def recent_story_events_for_debug(profile:)
  977. profile.instagram_profile_events
  978. .where(kind: [ "story_uploaded", "story_viewed", "story_analyzed", "story_skipped_debug" ])
  979. .order(occurred_at: :desc, id: :desc)
  980. .limit(20)
  981. .map do |event|
  982. {
  983. id: event.id,
  984. kind: event.kind,
  985. external_id: event.external_id,
  986. occurred_at: event.occurred_at&.iso8601,
  987. metadata: event.metadata.is_a?(Hash) ? event.metadata.slice("story_id", "skip_reason", "force_analyze_all", "story_index", "total_stories") : {}
  988. }
  989. end
  990. end
  991. end

app/jobs/sync_next_profiles_for_account_job.rb

0.0% lines covered

100.0% branches covered

43 relevant lines. 0 lines covered and 43 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class SyncNextProfilesForAccountJob < ApplicationJob
  2. queue_as :profiles
  3. def perform(instagram_account_id:, limit: 10)
  4. account = InstagramAccount.find(instagram_account_id)
  5. cap = limit.to_i.clamp(1, 50)
  6. profiles = account.instagram_profiles
  7. .order(Arel.sql("COALESCE(last_synced_at, '1970-01-01') ASC, COALESCE(last_active_at, '1970-01-01') DESC, username ASC"))
  8. .limit(cap)
  9. profiles.each do |profile|
  10. log = profile.instagram_profile_action_logs.create!(
  11. instagram_account: account,
  12. action: "fetch_profile_details",
  13. status: "queued",
  14. trigger_source: "account_sync_next_profiles",
  15. occurred_at: Time.current,
  16. metadata: { requested_by: self.class.name, limit: cap }
  17. )
  18. job = FetchInstagramProfileDetailsJob.perform_later(
  19. instagram_account_id: account.id,
  20. instagram_profile_id: profile.id,
  21. profile_action_log_id: log.id
  22. )
  23. log.update!(active_job_id: job.job_id, queue_name: job.queue_name)
  24. rescue StandardError => e
  25. Ops::StructuredLogger.warn(
  26. event: "sync_next_profiles.profile_enqueue_failed",
  27. payload: {
  28. account_id: account.id,
  29. profile_id: profile.id,
  30. error_class: e.class.name,
  31. error_message: e.message
  32. }
  33. )
  34. next
  35. end
  36. Turbo::StreamsChannel.broadcast_append_to(
  37. account,
  38. target: "notifications",
  39. partial: "shared/notification",
  40. locals: { kind: "notice", message: "Queued profile sync for next #{profiles.size} profiles." }
  41. )
  42. end
  43. end

app/jobs/sync_profile_stories_for_account_job.rb

0.0% lines covered

100.0% branches covered

76 relevant lines. 0 lines covered and 76 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class SyncProfileStoriesForAccountJob < ApplicationJob
  2. queue_as :story_downloads
  3. STORY_BATCH_LIMIT = 10
  4. STORIES_PER_PROFILE = SyncInstagramProfileStoriesJob::MAX_STORIES
  5. def perform(
  6. instagram_account_id:,
  7. story_limit: STORY_BATCH_LIMIT,
  8. stories_per_profile: STORIES_PER_PROFILE,
  9. with_comments: false,
  10. require_auto_reply_tag: false,
  11. force_analyze_all: false
  12. )
  13. account = InstagramAccount.find(instagram_account_id)
  14. limit = story_limit.to_i.clamp(1, STORY_BATCH_LIMIT)
  15. stories_per_profile_i = stories_per_profile.to_i.clamp(1, SyncInstagramProfileStoriesJob::MAX_STORIES)
  16. auto_reply = ActiveModel::Type::Boolean.new.cast(with_comments)
  17. require_tag = ActiveModel::Type::Boolean.new.cast(require_auto_reply_tag)
  18. force_analyze = ActiveModel::Type::Boolean.new.cast(force_analyze_all)
  19. scope = account.instagram_profiles
  20. .order(Arel.sql("COALESCE(last_story_seen_at, '1970-01-01') ASC, COALESCE(last_active_at, '1970-01-01') DESC, username ASC"))
  21. if require_tag
  22. tagged_profiles = account.instagram_profiles
  23. .joins(:profile_tags)
  24. .where(profile_tags: { name: [ "automatic_reply", "automatic reply", "auto_reply", "auto reply" ] })
  25. .select(:id)
  26. scope = scope.where(id: tagged_profiles)
  27. end
  28. profiles = scope.limit(limit)
  29. profiles.each do |profile|
  30. action = auto_reply ? "auto_story_reply" : "sync_stories"
  31. log = profile.instagram_profile_action_logs.create!(
  32. instagram_account: account,
  33. action: action,
  34. status: "queued",
  35. trigger_source: auto_reply ? "account_sync_stories_with_comments" : "account_sync_profile_stories",
  36. occurred_at: Time.current,
  37. metadata: {
  38. requested_by: self.class.name,
  39. story_limit: limit,
  40. max_stories_per_profile: stories_per_profile_i,
  41. auto_reply: auto_reply,
  42. require_auto_reply_tag: require_tag,
  43. force_analyze_all: force_analyze
  44. }
  45. )
  46. job = SyncInstagramProfileStoriesJob.perform_later(
  47. instagram_account_id: account.id,
  48. instagram_profile_id: profile.id,
  49. profile_action_log_id: log.id,
  50. max_stories: stories_per_profile_i,
  51. force_analyze_all: force_analyze,
  52. auto_reply: auto_reply,
  53. require_auto_reply_tag: require_tag
  54. )
  55. log.update!(active_job_id: job.job_id, queue_name: job.queue_name)
  56. rescue StandardError => e
  57. Ops::StructuredLogger.warn(
  58. event: "sync_profile_stories.profile_enqueue_failed",
  59. payload: {
  60. account_id: account.id,
  61. profile_id: profile.id,
  62. error_class: e.class.name,
  63. error_message: e.message
  64. }
  65. )
  66. next
  67. end
  68. label = auto_reply ? "story sync with auto-reply" : "story sync"
  69. Turbo::StreamsChannel.broadcast_append_to(
  70. account,
  71. target: "notifications",
  72. partial: "shared/notification",
  73. locals: { kind: "notice", message: "Queued #{label} for #{profiles.size} stories (max #{STORY_BATCH_LIMIT})." }
  74. )
  75. end
  76. end

app/jobs/sync_recent_profile_posts_for_profile_job.rb

0.0% lines covered

100.0% branches covered

307 relevant lines. 0 lines covered and 307 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "set"
  2. class SyncRecentProfilePostsForProfileJob < ApplicationJob
  3. class TransientProfileScanError < StandardError; end
  4. queue_as :post_downloads
  5. VISITED_TAG = "profile_posts_scanned".freeze
  6. ANALYZED_TAG = "profile_posts_analyzed".freeze
  7. MAX_POST_AGE_DAYS = 5
  8. PROFILE_SCAN_LOCK_NAMESPACE = 92_347
  9. retry_on Net::OpenTimeout, Net::ReadTimeout, wait: :polynomially_longer, attempts: 4
  10. retry_on Errno::ECONNREFUSED, Errno::ECONNRESET, wait: :polynomially_longer, attempts: 4
  11. retry_on Timeout::Error, wait: :polynomially_longer, attempts: 3
  12. retry_on TransientProfileScanError, wait: :polynomially_longer, attempts: 3
  13. selenium_timeout_error = "Selenium::WebDriver::Error::TimeoutError".safe_constantize
  14. retry_on selenium_timeout_error, wait: :polynomially_longer, attempts: 2 if selenium_timeout_error
  15. def perform(instagram_account_id:, instagram_profile_id:, posts_limit: 3, comments_limit: 8)
  16. account = InstagramAccount.find(instagram_account_id)
  17. profile = account.instagram_profiles.find(instagram_profile_id)
  18. posts_limit_i = posts_limit.to_i.clamp(1, 3)
  19. comments_limit_i = comments_limit.to_i.clamp(1, 20)
  20. lock_acquired = claim_profile_scan_lock!(profile_id: profile.id)
  21. unless lock_acquired
  22. Ops::StructuredLogger.info(
  23. event: "profile_scan.skipped_duplicate_execution",
  24. payload: {
  25. active_job_id: job_id,
  26. instagram_account_id: account.id,
  27. instagram_profile_id: profile.id
  28. }
  29. )
  30. return
  31. end
  32. action_log = profile.instagram_profile_action_logs.create!(
  33. instagram_account: account,
  34. action: "analyze_profile",
  35. status: "queued",
  36. trigger_source: "recurring_profile_recent_posts_scan",
  37. occurred_at: Time.current,
  38. active_job_id: job_id,
  39. queue_name: queue_name,
  40. metadata: { posts_limit: posts_limit_i, comments_limit: comments_limit_i }
  41. )
  42. action_log.mark_running!(extra_metadata: { active_job_id: job_id, queue_name: queue_name })
  43. story_result = fetch_story_dataset_with_fallback(account: account, profile: profile)
  44. story_dataset = story_result[:dataset]
  45. story_fetch_warning = story_result[:warning]
  46. update_story_activity!(profile: profile, story_dataset: story_dataset)
  47. policy_decision = Instagram::ProfileScanPolicy.new(profile: profile, profile_details: story_dataset[:profile]).decision
  48. if policy_decision[:skip_scan]
  49. handle_policy_skip!(
  50. account: account,
  51. profile: profile,
  52. action_log: action_log,
  53. decision: policy_decision,
  54. story_dataset: story_dataset,
  55. story_fetch_warning: story_fetch_warning
  56. )
  57. return
  58. end
  59. Instagram::ProfileScanPolicy.clear_scan_excluded!(profile: profile)
  60. existing_shortcodes = profile.instagram_profile_posts.pluck(:shortcode).to_set
  61. collected = Instagram::ProfileAnalysisCollector.new(account: account, profile: profile).collect_and_persist!(
  62. posts_limit: posts_limit_i,
  63. comments_limit: comments_limit_i
  64. )
  65. persisted_posts = Array(collected[:posts])
  66. feed_fetch = collected.dig(:summary, :feed_fetch)
  67. new_posts = persisted_posts.reject { |post| existing_shortcodes.include?(post.shortcode) }
  68. recent_cutoff = MAX_POST_AGE_DAYS.days.ago
  69. new_recent_posts = new_posts.select { |post| post.taken_at.present? && post.taken_at >= recent_cutoff }
  70. analysis_enqueue_failures = 0
  71. new_recent_posts.each do |post|
  72. post.update!(ai_status: "pending") if post.ai_status == "failed"
  73. AnalyzeInstagramProfilePostJob.perform_later(
  74. instagram_account_id: account.id,
  75. instagram_profile_id: profile.id,
  76. instagram_profile_post_id: post.id,
  77. task_flags: {
  78. generate_comments: true,
  79. enforce_comment_evidence_policy: true,
  80. retry_on_incomplete_profile: true
  81. }
  82. )
  83. rescue StandardError => enqueue_error
  84. analysis_enqueue_failures += 1
  85. Rails.logger.warn(
  86. "[SyncRecentProfilePostsForProfileJob] analyze enqueue failed for profile_post_id=#{post.id} " \
  87. "(profile_id=#{profile.id}): #{enqueue_error.class}: #{enqueue_error.message}"
  88. )
  89. next
  90. end
  91. apply_scan_tags!(profile: profile, has_new_posts: new_recent_posts.any?)
  92. profile.update!(last_synced_at: Time.current, ai_last_analyzed_at: Time.current)
  93. profile.record_event!(
  94. kind: "profile_recent_posts_scanned",
  95. external_id: "profile_recent_posts_scanned:#{Time.current.utc.iso8601(6)}",
  96. occurred_at: Time.current,
  97. metadata: {
  98. source: "recurring_profile_recent_posts_scan",
  99. stories_detected: Array(story_dataset[:stories]).length,
  100. latest_posts_fetched: persisted_posts.length,
  101. new_posts_enqueued_for_analysis: new_recent_posts.length,
  102. stale_posts_skipped_from_analysis: (new_posts.length - new_recent_posts.length),
  103. analysis_enqueue_failures: analysis_enqueue_failures,
  104. story_dataset_degraded: story_fetch_warning[:degraded],
  105. story_dataset_error_class: story_fetch_warning[:error_class],
  106. story_dataset_error_message: story_fetch_warning[:error_message]
  107. }
  108. )
  109. action_log.mark_succeeded!(
  110. extra_metadata: {
  111. stories_detected: Array(story_dataset[:stories]).length,
  112. fetched_posts: persisted_posts.length,
  113. new_posts: new_recent_posts.length,
  114. stale_posts_skipped_from_analysis: (new_posts.length - new_recent_posts.length),
  115. analysis_enqueue_failures: analysis_enqueue_failures,
  116. feed_fetch: feed_fetch.is_a?(Hash) ? feed_fetch : {},
  117. story_dataset_degraded: story_fetch_warning[:degraded],
  118. story_dataset_error_class: story_fetch_warning[:error_class],
  119. story_dataset_error_message: story_fetch_warning[:error_message]
  120. },
  121. log_text: "Scanned latest #{posts_limit_i} posts. New recent posts queued: #{new_recent_posts.length}, stale skipped: #{new_posts.length - new_recent_posts.length}, analysis enqueue failures: #{analysis_enqueue_failures}."
  122. )
  123. rescue StandardError => e
  124. normalized_error = normalize_job_error(e)
  125. action_log&.mark_failed!(
  126. error_message: normalized_error.message,
  127. extra_metadata: {
  128. active_job_id: job_id,
  129. executions: executions,
  130. error_class: normalized_error.class.name
  131. }
  132. )
  133. raise normalized_error
  134. ensure
  135. release_profile_scan_lock!(profile_id: profile.id) if lock_acquired
  136. end
  137. private
  138. def fetch_story_dataset_with_fallback(account:, profile:)
  139. dataset = Instagram::Client.new(account: account).fetch_profile_story_dataset!(
  140. username: profile.username,
  141. stories_limit: 3
  142. )
  143. {
  144. dataset: dataset,
  145. warning: { degraded: false, error_class: nil, error_message: nil }
  146. }
  147. rescue StandardError => e
  148. raise unless story_fetch_degradable_error?(e)
  149. Rails.logger.warn(
  150. "[SyncRecentProfilePostsForProfileJob] degraded story fetch for profile_id=#{profile.id} " \
  151. "(account_id=#{account.id}): #{e.class}: #{e.message}"
  152. )
  153. {
  154. dataset: {
  155. profile: {},
  156. user_id: nil,
  157. stories: [],
  158. fetched_at: Time.current
  159. },
  160. warning: {
  161. degraded: true,
  162. error_class: e.class.name,
  163. error_message: e.message.to_s
  164. }
  165. }
  166. end
  167. def story_fetch_degradable_error?(error)
  168. error.is_a?(Net::OpenTimeout) ||
  169. error.is_a?(Net::ReadTimeout) ||
  170. error.is_a?(Errno::ECONNREFUSED) ||
  171. error.is_a?(Errno::ECONNRESET) ||
  172. error.is_a?(Timeout::Error)
  173. end
  174. def normalize_job_error(error)
  175. authentication_error = normalize_authentication_error(error)
  176. return authentication_error if authentication_error
  177. normalize_retryable_error(error)
  178. end
  179. def normalize_authentication_error(error)
  180. return error if error.is_a?(Instagram::AuthenticationRequiredError)
  181. return nil unless error.is_a?(RuntimeError)
  182. message = error.message.to_s.downcase
  183. auth_runtime_message =
  184. message.include?("stored cookies are not authenticated") ||
  185. message.include?("authentication required") ||
  186. message.include?("no stored cookies")
  187. return nil unless auth_runtime_message
  188. wrapped = Instagram::AuthenticationRequiredError.new(error.message.to_s)
  189. wrapped.set_backtrace(error.backtrace)
  190. wrapped
  191. end
  192. def normalize_retryable_error(error)
  193. return error unless transient_runtime_error?(error)
  194. wrapped = TransientProfileScanError.new("Transient upstream response failure: #{error.message}")
  195. wrapped.set_backtrace(error.backtrace)
  196. wrapped
  197. end
  198. def transient_runtime_error?(error)
  199. return false unless error.is_a?(RuntimeError)
  200. message = error.message.to_s.downcase
  201. message.include?("http 429") ||
  202. message.include?("too many requests") ||
  203. message.include?("rate limit") ||
  204. message.include?("temporarily blocked")
  205. end
  206. def claim_profile_scan_lock!(profile_id:)
  207. return true unless postgres_adapter?
  208. # Advisory lock keeps at most one scan worker active per profile id.
  209. key_a, key_b = profile_scan_lock_keys(profile_id: profile_id)
  210. value = ActiveRecord::Base.connection.select_value("SELECT pg_try_advisory_lock(#{key_a}, #{key_b})")
  211. ActiveModel::Type::Boolean.new.cast(value)
  212. rescue StandardError => e
  213. Rails.logger.warn("[SyncRecentProfilePostsForProfileJob] lock claim failed for profile_id=#{profile_id}: #{e.class}: #{e.message}")
  214. true
  215. end
  216. def release_profile_scan_lock!(profile_id:)
  217. return unless postgres_adapter?
  218. key_a, key_b = profile_scan_lock_keys(profile_id: profile_id)
  219. ActiveRecord::Base.connection.select_value("SELECT pg_advisory_unlock(#{key_a}, #{key_b})")
  220. rescue StandardError => e
  221. Rails.logger.warn("[SyncRecentProfilePostsForProfileJob] lock release failed for profile_id=#{profile_id}: #{e.class}: #{e.message}")
  222. nil
  223. end
  224. def profile_scan_lock_keys(profile_id:)
  225. [ PROFILE_SCAN_LOCK_NAMESPACE, profile_id.to_i ]
  226. end
  227. def postgres_adapter?
  228. ActiveRecord::Base.connection.adapter_name.to_s.downcase.include?("postgres")
  229. rescue StandardError
  230. false
  231. end
  232. def update_story_activity!(profile:, story_dataset:)
  233. stories = Array(story_dataset[:stories])
  234. details = story_dataset[:profile].is_a?(Hash) ? story_dataset[:profile] : {}
  235. profile.display_name = details[:display_name].presence || profile.display_name
  236. profile.profile_pic_url = details[:profile_pic_url].presence || profile.profile_pic_url
  237. profile.ig_user_id = details[:ig_user_id].presence || profile.ig_user_id
  238. profile.bio = details[:bio].presence || profile.bio
  239. profile.followers_count = normalize_count(details[:followers_count]) || profile.followers_count
  240. profile.last_post_at = details[:last_post_at].presence || profile.last_post_at
  241. if stories.any?
  242. latest_story_at = stories.filter_map { |story| story[:taken_at] }.compact.max || Time.current
  243. profile.last_story_seen_at = latest_story_at
  244. profile.record_event!(
  245. kind: "story_seen",
  246. external_id: "story_seen:profile_scan:#{profile.username}:#{latest_story_at.to_i}",
  247. occurred_at: latest_story_at,
  248. metadata: {
  249. source: "recurring_profile_recent_posts_scan",
  250. stories_detected: stories.length
  251. }
  252. )
  253. end
  254. profile.recompute_last_active!
  255. profile.save!
  256. end
  257. def normalize_count(value)
  258. text = value.to_s.strip
  259. return nil unless text.match?(/\A\d+\z/)
  260. text.to_i
  261. rescue StandardError
  262. nil
  263. end
  264. def handle_policy_skip!(account:, profile:, action_log:, decision:, story_dataset:, story_fetch_warning:)
  265. reason_code = decision[:reason_code].to_s
  266. if reason_code == "non_personal_profile_page" || reason_code == "scan_excluded_tag"
  267. Instagram::ProfileScanPolicy.mark_scan_excluded!(profile: profile)
  268. end
  269. profile.update!(last_synced_at: Time.current)
  270. profile.record_event!(
  271. kind: "profile_recent_posts_scan_skipped",
  272. external_id: "profile_recent_posts_scan_skipped:#{Time.current.utc.iso8601(6)}",
  273. occurred_at: Time.current,
  274. metadata: {
  275. source: "recurring_profile_recent_posts_scan",
  276. reason_code: reason_code,
  277. reason: decision[:reason],
  278. followers_count: decision[:followers_count],
  279. max_followers: decision[:max_followers],
  280. stories_detected: Array(story_dataset[:stories]).length,
  281. story_dataset_degraded: story_fetch_warning[:degraded],
  282. story_dataset_error_class: story_fetch_warning[:error_class],
  283. story_dataset_error_message: story_fetch_warning[:error_message]
  284. }
  285. )
  286. action_log.mark_succeeded!(
  287. extra_metadata: {
  288. skipped: true,
  289. skip_reason_code: reason_code,
  290. skip_reason: decision[:reason],
  291. followers_count: decision[:followers_count],
  292. max_followers: decision[:max_followers],
  293. stories_detected: Array(story_dataset[:stories]).length,
  294. story_dataset_degraded: story_fetch_warning[:degraded],
  295. story_dataset_error_class: story_fetch_warning[:error_class],
  296. story_dataset_error_message: story_fetch_warning[:error_message]
  297. },
  298. log_text: "Skipped profile scan: #{decision[:reason]}"
  299. )
  300. end
  301. def apply_scan_tags!(profile:, has_new_posts:)
  302. visited_tag = ProfileTag.find_or_create_by!(name: VISITED_TAG)
  303. profile.profile_tags << visited_tag unless profile.profile_tags.exists?(id: visited_tag.id)
  304. return unless has_new_posts
  305. analyzed_tag = ProfileTag.find_or_create_by!(name: ANALYZED_TAG)
  306. profile.profile_tags << analyzed_tag unless profile.profile_tags.exists?(id: analyzed_tag.id)
  307. end
  308. end

app/jobs/verify_instagram_messageability_job.rb

0.0% lines covered

100.0% branches covered

57 relevant lines. 0 lines covered and 57 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class VerifyInstagramMessageabilityJob < ApplicationJob
  2. queue_as :profiles
  3. def perform(instagram_account_id:, instagram_profile_id:, profile_action_log_id: nil)
  4. account = InstagramAccount.find(instagram_account_id)
  5. profile = account.instagram_profiles.find(instagram_profile_id)
  6. action_log = find_or_create_action_log(
  7. account: account,
  8. profile: profile,
  9. action: "verify_messageability",
  10. profile_action_log_id: profile_action_log_id
  11. )
  12. action_log.mark_running!(extra_metadata: { queue_name: queue_name, active_job_id: job_id })
  13. result = Instagram::Client.new(account: account).verify_messageability!(username: profile.username)
  14. profile.update!(
  15. can_message: result[:can_message],
  16. restriction_reason: result[:restriction_reason],
  17. dm_interaction_state: result[:dm_state].to_s.presence || (result[:can_message] ? "messageable" : "unavailable"),
  18. dm_interaction_reason: result[:dm_reason].to_s.presence || result[:restriction_reason].to_s,
  19. dm_interaction_checked_at: Time.current,
  20. dm_interaction_retry_after_at: result[:dm_retry_after_at]
  21. )
  22. Turbo::StreamsChannel.broadcast_append_to(
  23. account,
  24. target: "notifications",
  25. partial: "shared/notification",
  26. locals: { kind: "notice", message: "Messageability for #{profile.username}: #{result[:can_message] ? 'Yes' : 'No'}." }
  27. )
  28. action_log.mark_succeeded!(
  29. extra_metadata: result,
  30. log_text: "Messageability result: #{result[:can_message] ? 'Yes' : 'No'}"
  31. )
  32. rescue StandardError => e
  33. Turbo::StreamsChannel.broadcast_append_to(
  34. account,
  35. target: "notifications",
  36. partial: "shared/notification",
  37. locals: { kind: "alert", message: "Messageability check failed: #{e.message}" }
  38. )
  39. action_log&.mark_failed!(error_message: e.message, extra_metadata: { active_job_id: job_id })
  40. raise
  41. end
  42. private
  43. def find_or_create_action_log(account:, profile:, action:, profile_action_log_id:)
  44. log = profile.instagram_profile_action_logs.find_by(id: profile_action_log_id) if profile_action_log_id.present?
  45. return log if log
  46. profile.instagram_profile_action_logs.create!(
  47. instagram_account: account,
  48. action: action,
  49. status: "queued",
  50. trigger_source: "job",
  51. occurred_at: Time.current,
  52. active_job_id: job_id,
  53. queue_name: queue_name,
  54. metadata: { created_by: self.class.name }
  55. )
  56. end
  57. end

app/jobs/workspace_process_actions_todo_post_job.rb

0.0% lines covered

100.0% branches covered

450 relevant lines. 0 lines covered and 450 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class WorkspaceProcessActionsTodoPostJob < ApplicationJob
  2. queue_as :ai
  3. PROFILE_INCOMPLETE_REASON_CODES =
  4. if defined?(ProcessPostMetadataTaggingJob::PROFILE_INCOMPLETE_REASON_CODES)
  5. ProcessPostMetadataTaggingJob::PROFILE_INCOMPLETE_REASON_CODES
  6. else
  7. %w[
  8. latest_posts_not_analyzed
  9. insufficient_analyzed_posts
  10. no_recent_posts_available
  11. missing_structured_post_signals
  12. profile_preparation_failed
  13. profile_preparation_error
  14. ].freeze
  15. end
  16. PROFILE_RETRY_MAX_ATTEMPTS = ENV.fetch("WORKSPACE_ACTIONS_PROFILE_RETRY_MAX_ATTEMPTS", 4).to_i.clamp(1, 12)
  17. POST_RETRY_WAIT_MINUTES = ENV.fetch("WORKSPACE_ACTIONS_POST_RETRY_WAIT_MINUTES", 20).to_i.clamp(5, 180)
  18. MEDIA_RETRY_WAIT_MINUTES = ENV.fetch("WORKSPACE_ACTIONS_MEDIA_RETRY_WAIT_MINUTES", 10).to_i.clamp(2, 90)
  19. ENQUEUE_COOLDOWN_SECONDS = ENV.fetch("WORKSPACE_ACTIONS_ENQUEUE_COOLDOWN_SECONDS", 180).to_i.clamp(15, 1800)
  20. RUNNING_LOCK_SECONDS = ENV.fetch("WORKSPACE_ACTIONS_RUNNING_LOCK_SECONDS", 600).to_i.clamp(60, 3600)
  21. def self.enqueue_if_needed!(account:, profile:, post:, requested_by:, wait_until: nil, force: false)
  22. return { enqueued: false, reason: "post_missing" } unless account && profile && post
  23. now = Time.current
  24. forced = ActiveModel::Type::Boolean.new.cast(force)
  25. scheduled_at = wait_until.is_a?(Time) ? wait_until : nil
  26. # Persisted queue state is row-local; lock to prevent duplicate enqueue races.
  27. post.with_lock do
  28. metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  29. state = metadata["workspace_actions"].is_a?(Hash) ? metadata["workspace_actions"].deep_dup : {}
  30. suggestions = normalized_suggestions(post)
  31. return { enqueued: false, reason: "already_ready" } if suggestions.any? && !forced
  32. next_run_at = parse_time(state["next_run_at"])
  33. if next_run_at.present? && next_run_at > now && !forced && scheduled_at.nil?
  34. return { enqueued: false, reason: "retry_already_scheduled", next_run_at: next_run_at.iso8601 }
  35. end
  36. lock_until = parse_time(state["lock_until"])
  37. if lock_until.present? && lock_until > now && !forced
  38. return { enqueued: false, reason: "already_running", lock_until: lock_until.iso8601 }
  39. end
  40. last_enqueued_at = parse_time(state["last_enqueued_at"])
  41. if last_enqueued_at.present? && (now - last_enqueued_at) < ENQUEUE_COOLDOWN_SECONDS && !forced && scheduled_at.nil?
  42. return { enqueued: false, reason: "enqueue_cooldown_active" }
  43. end
  44. job =
  45. if scheduled_at.present?
  46. set(wait_until: scheduled_at).perform_later(
  47. instagram_account_id: account.id,
  48. instagram_profile_id: profile.id,
  49. instagram_profile_post_id: post.id,
  50. requested_by: requested_by.to_s
  51. )
  52. else
  53. perform_later(
  54. instagram_account_id: account.id,
  55. instagram_profile_id: profile.id,
  56. instagram_profile_post_id: post.id,
  57. requested_by: requested_by.to_s
  58. )
  59. end
  60. state["status"] = "queued"
  61. state["requested_by"] = requested_by.to_s.presence || "workspace"
  62. state["job_id"] = job.job_id
  63. state["queue_name"] = job.queue_name
  64. state["last_enqueued_at"] = now.iso8601(3)
  65. state["last_error"] = nil
  66. state["next_run_at"] = scheduled_at&.iso8601(3)
  67. state["updated_at"] = now.iso8601(3)
  68. state["source"] = name
  69. metadata["workspace_actions"] = state
  70. post.update!(metadata: metadata)
  71. {
  72. enqueued: true,
  73. reason: scheduled_at.present? ? "scheduled" : "queued",
  74. job_id: job.job_id,
  75. queue_name: job.queue_name,
  76. next_run_at: scheduled_at&.iso8601(3)
  77. }
  78. end
  79. rescue StandardError => e
  80. {
  81. enqueued: false,
  82. reason: "enqueue_failed",
  83. error_class: e.class.name,
  84. error_message: e.message.to_s
  85. }
  86. end
  87. def perform(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, requested_by: "workspace")
  88. account = InstagramAccount.find(instagram_account_id)
  89. profile = account.instagram_profiles.find(instagram_profile_id)
  90. post = profile.instagram_profile_posts.find(instagram_profile_post_id)
  91. unless user_created_post?(post)
  92. persist_workspace_state!(post: post, status: "skipped_non_user_post", requested_by: requested_by, next_run_at: nil)
  93. return
  94. end
  95. policy_decision = Instagram::ProfileScanPolicy.new(profile: profile).decision
  96. if ActiveModel::Type::Boolean.new.cast(policy_decision[:skip_post_analysis])
  97. persist_workspace_state!(
  98. post: post,
  99. status: "skipped_page_profile",
  100. requested_by: requested_by,
  101. last_error: policy_decision[:reason].to_s,
  102. next_run_at: nil
  103. )
  104. return
  105. end
  106. if post_deleted_from_source?(post)
  107. persist_workspace_state!(post: post, status: "skipped_deleted_source", requested_by: requested_by, next_run_at: nil)
  108. return
  109. end
  110. mark_running!(post: post, requested_by: requested_by)
  111. ensure_video_preview_generation!(post: post)
  112. unless post.media.attached?
  113. queue_media_download!(account: account, profile: profile, post: post)
  114. schedule_retry!(
  115. account: account,
  116. profile: profile,
  117. post: post,
  118. requested_by: requested_by,
  119. wait_until: Time.current + MEDIA_RETRY_WAIT_MINUTES.minutes,
  120. status: "waiting_media_download",
  121. last_error: nil
  122. )
  123. return
  124. end
  125. if post_analysis_running?(post)
  126. schedule_retry!(
  127. account: account,
  128. profile: profile,
  129. post: post,
  130. requested_by: requested_by,
  131. wait_until: Time.current + POST_RETRY_WAIT_MINUTES.minutes,
  132. status: "waiting_post_analysis",
  133. last_error: nil
  134. )
  135. return
  136. end
  137. unless post_analyzed?(post)
  138. queue_post_analysis!(account: account, profile: profile, post: post)
  139. schedule_retry!(
  140. account: account,
  141. profile: profile,
  142. post: post,
  143. requested_by: requested_by,
  144. wait_until: Time.current + POST_RETRY_WAIT_MINUTES.minutes,
  145. status: "waiting_post_analysis",
  146. last_error: nil
  147. )
  148. return
  149. end
  150. suggestions = self.class.normalized_suggestions(post)
  151. if suggestions.any?
  152. persist_workspace_state!(
  153. post: post,
  154. status: "ready",
  155. requested_by: requested_by,
  156. suggestions_count: suggestions.length,
  157. next_run_at: nil
  158. )
  159. return
  160. end
  161. comment_result = Ai::PostCommentGenerationService.new(
  162. account: account,
  163. profile: profile,
  164. post: post,
  165. enforce_required_evidence: true
  166. ).run!
  167. post.reload
  168. suggestions = self.class.normalized_suggestions(post)
  169. if suggestions.any?
  170. persist_workspace_state!(
  171. post: post,
  172. status: "ready",
  173. requested_by: requested_by,
  174. suggestions_count: suggestions.length,
  175. next_run_at: nil
  176. )
  177. return
  178. end
  179. if retryable_profile_incomplete_block?(post: post, comment_result: comment_result)
  180. retry_result = schedule_build_history_retry!(
  181. account: account,
  182. profile: profile,
  183. post: post,
  184. requested_by: requested_by,
  185. history_reason_code: post.metadata.dig("comment_generation_policy", "history_reason_code").to_s
  186. )
  187. persist_workspace_state!(
  188. post: post,
  189. status: "waiting_build_history",
  190. requested_by: requested_by,
  191. next_run_at: parse_time(retry_result[:next_run_at]),
  192. last_error: retry_result[:queued] ? nil : retry_result[:reason].to_s
  193. )
  194. return
  195. end
  196. blocked_reason = post.metadata.dig("comment_generation_policy", "blocked_reason").to_s
  197. reason_code = post.metadata.dig("comment_generation_policy", "blocked_reason_code").to_s
  198. persist_workspace_state!(
  199. post: post,
  200. status: "failed",
  201. requested_by: requested_by,
  202. next_run_at: nil,
  203. last_error: blocked_reason.presence || reason_code.presence || "comment_generation_failed"
  204. )
  205. rescue StandardError => e
  206. post&.reload
  207. persist_workspace_state!(
  208. post: post,
  209. status: "failed",
  210. requested_by: requested_by,
  211. next_run_at: nil,
  212. last_error: "#{e.class}: #{e.message}"
  213. ) if post&.persisted?
  214. raise
  215. end
  216. private
  217. def self.parse_time(value)
  218. return nil if value.to_s.blank?
  219. Time.zone.parse(value.to_s)
  220. rescue StandardError
  221. nil
  222. end
  223. def parse_time(value)
  224. self.class.parse_time(value)
  225. end
  226. def self.normalized_suggestions(post)
  227. analysis = post.analysis.is_a?(Hash) ? post.analysis : {}
  228. Array(analysis["comment_suggestions"]).map { |value| value.to_s.strip }.reject(&:blank?).uniq.first(8)
  229. rescue StandardError
  230. []
  231. end
  232. def persist_workspace_state!(post:, status:, requested_by:, next_run_at:, last_error: nil, suggestions_count: nil)
  233. post.with_lock do
  234. metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  235. state = metadata["workspace_actions"].is_a?(Hash) ? metadata["workspace_actions"].deep_dup : {}
  236. state["status"] = status.to_s
  237. state["requested_by"] = requested_by.to_s.presence || state["requested_by"].to_s.presence || "workspace"
  238. state["updated_at"] = Time.current.iso8601(3)
  239. state["finished_at"] = Time.current.iso8601(3)
  240. state["lock_until"] = nil
  241. state["last_error"] = last_error.to_s.presence
  242. state["next_run_at"] = next_run_at&.iso8601(3)
  243. state["suggestions_count"] = suggestions_count.to_i if suggestions_count.present?
  244. state["last_ready_at"] = Time.current.iso8601(3) if status.to_s == "ready"
  245. metadata["workspace_actions"] = state
  246. post.update!(metadata: metadata)
  247. end
  248. rescue StandardError
  249. nil
  250. end
  251. def mark_running!(post:, requested_by:)
  252. post.with_lock do
  253. metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  254. state = metadata["workspace_actions"].is_a?(Hash) ? metadata["workspace_actions"].deep_dup : {}
  255. now = Time.current
  256. state["status"] = "running"
  257. state["requested_by"] = requested_by.to_s.presence || "workspace"
  258. state["started_at"] = now.iso8601(3)
  259. state["updated_at"] = now.iso8601(3)
  260. state["lock_until"] = (now + RUNNING_LOCK_SECONDS.seconds).iso8601(3)
  261. state["last_error"] = nil
  262. metadata["workspace_actions"] = state
  263. post.update!(metadata: metadata)
  264. end
  265. end
  266. def schedule_retry!(account:, profile:, post:, requested_by:, wait_until:, status:, last_error:)
  267. retry_time = wait_until.is_a?(Time) ? wait_until : Time.current + POST_RETRY_WAIT_MINUTES.minutes
  268. result = self.class.enqueue_if_needed!(
  269. account: account,
  270. profile: profile,
  271. post: post,
  272. requested_by: "workspace_retry:#{requested_by}",
  273. wait_until: retry_time,
  274. force: true
  275. )
  276. persist_workspace_state!(
  277. post: post,
  278. status: status,
  279. requested_by: requested_by,
  280. next_run_at: retry_time,
  281. last_error: result[:enqueued] ? nil : (last_error.presence || result[:reason].to_s)
  282. )
  283. result
  284. end
  285. def queue_media_download!(account:, profile:, post:)
  286. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  287. workspace = metadata["workspace_actions"].is_a?(Hash) ? metadata["workspace_actions"] : {}
  288. pending_job = workspace["media_download_job_id"].to_s
  289. if pending_job.present? && post.media.attached?
  290. return { queued: false, reason: "already_downloaded" }
  291. end
  292. job = DownloadInstagramProfilePostMediaJob.perform_later(
  293. instagram_account_id: account.id,
  294. instagram_profile_id: profile.id,
  295. instagram_profile_post_id: post.id,
  296. trigger_analysis: false
  297. )
  298. post.with_lock do
  299. updated_metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  300. state = updated_metadata["workspace_actions"].is_a?(Hash) ? updated_metadata["workspace_actions"].deep_dup : {}
  301. state["media_download_job_id"] = job.job_id
  302. state["media_download_queued_at"] = Time.current.iso8601(3)
  303. updated_metadata["workspace_actions"] = state
  304. post.update!(metadata: updated_metadata)
  305. end
  306. { queued: true, job_id: job.job_id }
  307. rescue StandardError => e
  308. { queued: false, reason: "media_download_enqueue_failed", error_class: e.class.name, error_message: e.message.to_s }
  309. end
  310. def queue_post_analysis!(account:, profile:, post:)
  311. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  312. workspace = metadata["workspace_actions"].is_a?(Hash) ? metadata["workspace_actions"] : {}
  313. last_queued_at = parse_time(workspace["post_analysis_queued_at"])
  314. if last_queued_at.present? && last_queued_at > 10.minutes.ago && post_analysis_running?(post)
  315. return { queued: false, reason: "post_analysis_already_running" }
  316. end
  317. job = AnalyzeInstagramProfilePostJob.perform_later(
  318. instagram_account_id: account.id,
  319. instagram_profile_id: profile.id,
  320. instagram_profile_post_id: post.id,
  321. task_flags: {
  322. analyze_visual: true,
  323. analyze_faces: true,
  324. run_ocr: true,
  325. run_video: true,
  326. run_metadata: true,
  327. generate_comments: false,
  328. enforce_comment_evidence_policy: false,
  329. retry_on_incomplete_profile: false
  330. }
  331. )
  332. post.with_lock do
  333. updated_metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  334. state = updated_metadata["workspace_actions"].is_a?(Hash) ? updated_metadata["workspace_actions"].deep_dup : {}
  335. state["post_analysis_job_id"] = job.job_id
  336. state["post_analysis_queued_at"] = Time.current.iso8601(3)
  337. updated_metadata["workspace_actions"] = state
  338. post.update!(metadata: updated_metadata)
  339. end
  340. { queued: true, job_id: job.job_id }
  341. rescue StandardError => e
  342. { queued: false, reason: "post_analysis_enqueue_failed", error_class: e.class.name, error_message: e.message.to_s }
  343. end
  344. def schedule_build_history_retry!(account:, profile:, post:, requested_by:, history_reason_code:)
  345. post.with_lock do
  346. metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  347. state = metadata["workspace_actions"].is_a?(Hash) ? metadata["workspace_actions"].deep_dup : {}
  348. attempts = state["profile_retry_attempts"].to_i
  349. if attempts >= PROFILE_RETRY_MAX_ATTEMPTS
  350. next {
  351. queued: false,
  352. reason: "retry_attempts_exhausted",
  353. next_run_at: nil
  354. }
  355. end
  356. resume_result = BuildInstagramProfileHistoryJob.enqueue_with_resume_if_needed!(
  357. account: account,
  358. profile: profile,
  359. trigger_source: "workspace_actions_queue",
  360. requested_by: self.class.name,
  361. resume_job: {
  362. job_class: self.class,
  363. job_kwargs: {
  364. instagram_account_id: account.id,
  365. instagram_profile_id: profile.id,
  366. instagram_profile_post_id: post.id,
  367. requested_by: "workspace_history_retry:#{requested_by}"
  368. }
  369. }
  370. )
  371. unless ActiveModel::Type::Boolean.new.cast(resume_result[:accepted])
  372. next {
  373. queued: false,
  374. reason: resume_result[:reason].to_s.presence || "build_history_enqueue_failed",
  375. next_run_at: nil
  376. }
  377. end
  378. state["profile_retry_attempts"] = attempts + 1
  379. state["profile_retry_reason_code"] = history_reason_code.to_s
  380. state["build_history_action_log_id"] = resume_result[:action_log_id].to_i if resume_result[:action_log_id].present?
  381. state["build_history_job_id"] = resume_result[:job_id].to_s.presence
  382. state["next_run_at"] = resume_result[:next_run_at].to_s.presence
  383. state["updated_at"] = Time.current.iso8601(3)
  384. metadata["workspace_actions"] = state
  385. post.update!(metadata: metadata)
  386. {
  387. queued: true,
  388. reason: "build_history_fallback_registered",
  389. next_run_at: resume_result[:next_run_at],
  390. action_log_id: resume_result[:action_log_id],
  391. job_id: resume_result[:job_id].to_s
  392. }
  393. end
  394. rescue StandardError => e
  395. {
  396. queued: false,
  397. reason: "retry_enqueue_failed",
  398. next_run_at: nil,
  399. error_class: e.class.name,
  400. error_message: e.message.to_s
  401. }
  402. end
  403. def retryable_profile_incomplete_block?(post:, comment_result:)
  404. return false unless ActiveModel::Type::Boolean.new.cast(comment_result[:blocked])
  405. return false unless comment_result[:reason_code].to_s == "missing_required_evidence"
  406. policy = post.metadata.is_a?(Hash) ? post.metadata["comment_generation_policy"] : nil
  407. return false unless policy.is_a?(Hash)
  408. return false if ActiveModel::Type::Boolean.new.cast(policy["history_ready"])
  409. PROFILE_INCOMPLETE_REASON_CODES.include?(policy["history_reason_code"].to_s)
  410. rescue StandardError
  411. false
  412. end
  413. def post_analysis_running?(post)
  414. return true if post.ai_status.to_s.in?(%w[pending running])
  415. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  416. pipeline = metadata["ai_pipeline"].is_a?(Hash) ? metadata["ai_pipeline"] : {}
  417. pipeline["status"].to_s == "running"
  418. rescue StandardError
  419. false
  420. end
  421. def post_analyzed?(post)
  422. post.ai_status.to_s == "analyzed" && post.analyzed_at.present?
  423. end
  424. def post_deleted_from_source?(post)
  425. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  426. ActiveModel::Type::Boolean.new.cast(metadata["deleted_from_source"])
  427. end
  428. def user_created_post?(post)
  429. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  430. post_kind = metadata["post_kind"].to_s.downcase
  431. return false if post_kind == "story"
  432. product_type = metadata["product_type"].to_s.downcase
  433. return false if product_type == "story"
  434. return false if ActiveModel::Type::Boolean.new.cast(metadata["is_story"])
  435. true
  436. rescue StandardError
  437. false
  438. end
  439. def ensure_video_preview_generation!(post:)
  440. return unless post.media.attached?
  441. return unless post.media.blob&.content_type.to_s.start_with?("video/")
  442. return if post.preview_image.attached?
  443. cache_key = "workspace_actions:preview:#{post.id}"
  444. Rails.cache.fetch(cache_key, expires_in: 30.minutes) do
  445. GenerateProfilePostPreviewImageJob.perform_later(instagram_profile_post_id: post.id)
  446. true
  447. end
  448. rescue StandardError
  449. nil
  450. end
  451. end

app/mailers/application_mailer.rb

0.0% lines covered

100.0% branches covered

4 relevant lines. 0 lines covered and 4 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class ApplicationMailer < ActionMailer::Base
  2. default from: "from@example.com"
  3. layout "mailer"
  4. end

app/models/active_storage_ingestion.rb

0.0% lines covered

100.0% branches covered

70 relevant lines. 0 lines covered and 70 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class ActiveStorageIngestion < ApplicationRecord
  2. belongs_to :attachment, class_name: "ActiveStorage::Attachment", foreign_key: :active_storage_attachment_id
  3. belongs_to :blob, class_name: "ActiveStorage::Blob", foreign_key: :active_storage_blob_id
  4. belongs_to :instagram_account, optional: true
  5. belongs_to :instagram_profile, optional: true
  6. validates :active_storage_attachment_id, uniqueness: true
  7. validates :attachment_name, :blob_filename, :blob_byte_size, presence: true
  8. scope :recent_first, -> { order(created_at: :desc, id: :desc) }
  9. after_commit :broadcast_live_updates
  10. def self.record_from_attachment!(attachment:)
  11. return if exists?(active_storage_attachment_id: attachment.id)
  12. blob = attachment.blob
  13. context = extract_record_context(record: attachment.record)
  14. current_context = Current.job_context
  15. create!(
  16. active_storage_attachment_id: attachment.id,
  17. active_storage_blob_id: blob.id,
  18. attachment_name: attachment.name.to_s,
  19. record_type: attachment.record_type.to_s,
  20. record_id: attachment.record_id,
  21. blob_filename: blob.filename.to_s,
  22. blob_content_type: blob.content_type.to_s.presence,
  23. blob_byte_size: blob.byte_size.to_i,
  24. instagram_account_id: context[:instagram_account_id] || current_context[:instagram_account_id],
  25. instagram_profile_id: context[:instagram_profile_id] || current_context[:instagram_profile_id],
  26. created_by_job_class: current_context[:job_class],
  27. created_by_active_job_id: current_context[:active_job_id],
  28. created_by_provider_job_id: current_context[:provider_job_id],
  29. queue_name: current_context[:queue_name],
  30. metadata: {
  31. service_name: blob.service_name,
  32. checksum: blob.checksum,
  33. content_type: blob.content_type,
  34. blob_created_at: blob.created_at&.iso8601
  35. }
  36. )
  37. rescue StandardError => e
  38. Rails.logger.warn("[storage.ingestion] capture failed: #{e.class}: #{e.message}")
  39. nil
  40. end
  41. def self.extract_record_context(record:)
  42. return {} unless record
  43. account_id =
  44. if record.respond_to?(:instagram_account_id)
  45. record.instagram_account_id
  46. elsif record.respond_to?(:instagram_account) && record.instagram_account.respond_to?(:id)
  47. record.instagram_account.id
  48. end
  49. profile_id =
  50. if record.respond_to?(:instagram_profile_id)
  51. record.instagram_profile_id
  52. elsif record.respond_to?(:instagram_profile) && record.instagram_profile.respond_to?(:id)
  53. record.instagram_profile.id
  54. elsif record.is_a?(InstagramProfile)
  55. record.id
  56. end
  57. { instagram_account_id: account_id, instagram_profile_id: profile_id }
  58. rescue StandardError
  59. {}
  60. end
  61. private
  62. def broadcast_live_updates
  63. Ops::LiveUpdateBroadcaster.broadcast!(
  64. topic: "storage_ingestions_changed",
  65. account_id: instagram_account_id,
  66. payload: { ingestion_id: id },
  67. throttle_key: "storage_ingestions_changed"
  68. )
  69. end
  70. end

app/models/ai_analysis.rb

0.0% lines covered

100.0% branches covered

23 relevant lines. 0 lines covered and 23 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class AiAnalysis < ApplicationRecord
  2. belongs_to :instagram_account
  3. belongs_to :analyzable, polymorphic: true
  4. belongs_to :cached_from_analysis, class_name: "AiAnalysis", foreign_key: :cached_from_ai_analysis_id, optional: true
  5. has_many :cached_copies, class_name: "AiAnalysis", foreign_key: :cached_from_ai_analysis_id, dependent: :nullify
  6. has_one :instagram_profile_insight, dependent: :destroy
  7. has_one :instagram_profile_message_strategy, dependent: :destroy
  8. has_many :instagram_profile_signal_evidences, dependent: :destroy
  9. has_one :instagram_post_insight, dependent: :destroy
  10. encrypts :prompt
  11. encrypts :response_text
  12. validates :purpose, presence: true, inclusion: { in: %w[profile post] }
  13. validates :provider, presence: true
  14. validates :status, presence: true
  15. scope :recent_first, -> { order(created_at: :desc, id: :desc) }
  16. scope :succeeded, -> { where(status: "succeeded") }
  17. scope :reusable_for, ->(purpose:, media_fingerprint:) {
  18. succeeded
  19. .where(purpose: purpose, media_fingerprint: media_fingerprint)
  20. .where.not(analysis: nil)
  21. .recent_first
  22. }
  23. end

app/models/ai_api_call.rb

0.0% lines covered

100.0% branches covered

12 relevant lines. 0 lines covered and 12 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class AiApiCall < ApplicationRecord
  2. belongs_to :instagram_account, optional: true
  3. CATEGORIES = %w[image_analysis video_analysis report_generation text_generation healthcheck other].freeze
  4. STATUSES = %w[succeeded failed].freeze
  5. validates :provider, presence: true
  6. validates :operation, presence: true
  7. validates :category, presence: true, inclusion: { in: CATEGORIES }
  8. validates :status, presence: true, inclusion: { in: STATUSES }
  9. validates :occurred_at, presence: true
  10. scope :recent_first, -> { order(occurred_at: :desc, id: :desc) }
  11. scope :within, ->(range) { where(occurred_at: range) }
  12. end

app/models/ai_provider_setting.rb

0.0% lines covered

100.0% branches covered

43 relevant lines. 0 lines covered and 43 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class AiProviderSetting < ApplicationRecord
  2. SUPPORTED_PROVIDERS = %w[local].freeze
  3. encrypts :api_key
  4. validates :provider, presence: true, inclusion: { in: SUPPORTED_PROVIDERS }
  5. validates :provider, uniqueness: true
  6. validates :priority, numericality: { greater_than_or_equal_to: 0 }
  7. scope :enabled_first, -> { order(enabled: :desc, priority: :asc, provider: :asc) }
  8. def config_hash
  9. value = config
  10. return {} unless value.is_a?(Hash)
  11. value.stringify_keys
  12. end
  13. def config_value(key)
  14. config_hash[key.to_s]
  15. end
  16. def set_config_value(key, value)
  17. merged = config_hash
  18. if value.present?
  19. merged[key.to_s] = value
  20. else
  21. merged.delete(key.to_s)
  22. end
  23. self.config = merged
  24. end
  25. def display_name
  26. case provider
  27. when "local" then "Local AI Microservice"
  28. else provider.to_s.humanize
  29. end
  30. end
  31. def effective_api_key
  32. return api_key.to_s if api_key.to_s.present?
  33. ""
  34. end
  35. def effective_model
  36. model = config_value("model").to_s
  37. return model if model.present?
  38. ""
  39. end
  40. def api_key_present?
  41. effective_api_key.present?
  42. end
  43. end

app/models/app_issue.rb

0.0% lines covered

100.0% branches covered

54 relevant lines. 0 lines covered and 54 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class AppIssue < ApplicationRecord
  2. STATUSES = %w[open pending resolved].freeze
  3. SEVERITIES = %w[info warn error critical].freeze
  4. belongs_to :instagram_account, optional: true
  5. belongs_to :instagram_profile, optional: true
  6. belongs_to :background_job_failure, optional: true
  7. validates :fingerprint, presence: true, uniqueness: true
  8. validates :issue_type, :source, :title, presence: true
  9. validates :status, inclusion: { in: STATUSES }
  10. validates :severity, inclusion: { in: SEVERITIES }
  11. validates :first_seen_at, :last_seen_at, presence: true
  12. scope :recent_first, -> { order(last_seen_at: :desc, id: :desc) }
  13. scope :active, -> { where.not(status: "resolved") }
  14. after_commit :broadcast_live_updates
  15. def retryable?
  16. background_job_failure.present? && background_job_failure.retryable?
  17. end
  18. def mark_open!(notes: nil)
  19. update!(
  20. status: "open",
  21. resolved_at: nil,
  22. resolution_notes: notes.presence || resolution_notes
  23. )
  24. end
  25. def mark_pending!(notes: nil)
  26. update!(
  27. status: "pending",
  28. resolved_at: nil,
  29. resolution_notes: notes.presence || resolution_notes
  30. )
  31. end
  32. def mark_resolved!(notes: nil)
  33. update!(
  34. status: "resolved",
  35. resolved_at: Time.current,
  36. resolution_notes: notes.presence || resolution_notes
  37. )
  38. end
  39. private
  40. def broadcast_live_updates
  41. Ops::LiveUpdateBroadcaster.broadcast!(
  42. topic: "issues_changed",
  43. account_id: instagram_account_id,
  44. payload: { issue_id: id, status: status },
  45. throttle_key: "issues_changed"
  46. )
  47. Ops::LiveUpdateBroadcaster.broadcast!(
  48. topic: "dashboard_metrics_changed",
  49. account_id: instagram_account_id,
  50. payload: { source: "app_issue" },
  51. throttle_key: "dashboard_metrics_changed"
  52. )
  53. end
  54. end

app/models/application_record.rb

0.0% lines covered

100.0% branches covered

3 relevant lines. 0 lines covered and 3 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class ApplicationRecord < ActiveRecord::Base
  2. primary_abstract_class
  3. end

app/models/background_job_failure.rb

0.0% lines covered

100.0% branches covered

38 relevant lines. 0 lines covered and 38 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class BackgroundJobFailure < ApplicationRecord
  2. FAILURE_KINDS = %w[authentication transient runtime].freeze
  3. belongs_to :instagram_account, optional: true
  4. belongs_to :instagram_profile, optional: true
  5. has_many :app_issues, dependent: :nullify
  6. validates :active_job_id, presence: true
  7. validates :job_class, presence: true
  8. validates :error_class, presence: true
  9. validates :error_message, presence: true
  10. validates :occurred_at, presence: true
  11. validates :failure_kind, inclusion: { in: FAILURE_KINDS }
  12. scope :recent_first, -> { order(occurred_at: :desc, id: :desc) }
  13. after_commit :broadcast_live_updates
  14. def auth_failure?
  15. failure_kind == "authentication"
  16. end
  17. def retryable_now?
  18. retryable? && !auth_failure?
  19. end
  20. def retryable?
  21. self[:retryable] == true
  22. end
  23. private
  24. def broadcast_live_updates
  25. Ops::LiveUpdateBroadcaster.broadcast!(
  26. topic: "job_failures_changed",
  27. account_id: instagram_account_id,
  28. payload: { failure_id: id, failure_kind: failure_kind },
  29. throttle_key: "job_failures_changed"
  30. )
  31. Ops::LiveUpdateBroadcaster.broadcast!(
  32. topic: "jobs_changed",
  33. account_id: instagram_account_id,
  34. payload: { source: "background_job_failure", failure_id: id },
  35. throttle_key: "jobs_changed"
  36. )
  37. end
  38. end

app/models/concerns/active_storage_ingestion_tracking.rb

85.71% lines covered

100.0% branches covered

7 relevant lines. 6 lines covered and 1 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. 1 module ActiveStorageIngestionTracking
  2. 1 extend ActiveSupport::Concern
  3. 1 included do
  4. 1 after_create_commit :capture_storage_ingestion_row
  5. end
  6. 1 private
  7. 1 def capture_storage_ingestion_row
  8. ActiveStorageIngestion.record_from_attachment!(attachment: self)
  9. end
  10. end

app/models/conversation_peer.rb

0.0% lines covered

100.0% branches covered

4 relevant lines. 0 lines covered and 4 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class ConversationPeer < ApplicationRecord
  2. belongs_to :instagram_account
  3. validates :username, presence: true
  4. end

app/models/current.rb

0.0% lines covered

100.0% branches covered

18 relevant lines. 0 lines covered and 18 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class Current < ActiveSupport::CurrentAttributes
  2. attribute :active_job_id,
  3. :provider_job_id,
  4. :job_class,
  5. :queue_name,
  6. :instagram_account_id,
  7. :instagram_profile_id
  8. def job_context
  9. {
  10. active_job_id: active_job_id,
  11. provider_job_id: provider_job_id,
  12. job_class: job_class,
  13. queue_name: queue_name,
  14. instagram_account_id: instagram_account_id,
  15. instagram_profile_id: instagram_profile_id
  16. }
  17. end
  18. end

app/models/instagram_account.rb

0.0% lines covered

100.0% branches covered

109 relevant lines. 0 lines covered and 109 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramAccount < ApplicationRecord
  2. CONTINUOUS_PROCESSING_STATES = %w[idle running paused].freeze
  3. has_many :recipients, dependent: :destroy
  4. has_many :conversation_peers, dependent: :destroy
  5. has_many :instagram_profiles, dependent: :destroy
  6. has_many :instagram_messages, dependent: :destroy
  7. has_many :sync_runs, dependent: :destroy
  8. has_many :instagram_profile_analyses, through: :instagram_profiles
  9. has_many :instagram_posts, dependent: :destroy
  10. has_many :instagram_profile_posts, dependent: :destroy
  11. has_many :ai_analyses, dependent: :destroy
  12. has_many :ai_api_calls, dependent: :destroy
  13. has_many :instagram_profile_action_logs, dependent: :destroy
  14. has_many :instagram_profile_insights, dependent: :destroy
  15. has_many :instagram_profile_message_strategies, dependent: :destroy
  16. has_many :instagram_profile_signal_evidences, dependent: :destroy
  17. has_many :instagram_post_insights, dependent: :destroy
  18. has_many :instagram_post_entities, dependent: :destroy
  19. has_many :instagram_profile_history_chunks, dependent: :destroy
  20. has_many :instagram_stories, dependent: :destroy
  21. has_many :instagram_story_people, dependent: :destroy
  22. has_many :app_issues, dependent: :nullify
  23. has_many :active_storage_ingestions, dependent: :nullify
  24. encryption = Rails.application.config.active_record.encryption
  25. if encryption.primary_key.present? &&
  26. encryption.deterministic_key.present? &&
  27. encryption.key_derivation_salt.present?
  28. encrypts :cookies_json
  29. encrypts :local_storage_json
  30. encrypts :session_storage_json
  31. encrypts :auth_snapshot_json
  32. end
  33. validates :username, presence: true
  34. validates :continuous_processing_state, inclusion: { in: CONTINUOUS_PROCESSING_STATES }, allow_nil: true
  35. scope :continuous_processing_enabled, -> { where(continuous_processing_enabled: true) }
  36. def continuous_processing_backoff_active?
  37. continuous_processing_retry_after_at.present? && continuous_processing_retry_after_at > Time.current
  38. end
  39. def cookies
  40. return [] if cookies_json.blank?
  41. JSON.parse(cookies_json)
  42. rescue JSON::ParserError
  43. []
  44. end
  45. def cookies=(raw_cookies)
  46. self.cookies_json = Array(raw_cookies).to_json
  47. end
  48. def local_storage
  49. parse_json_array(local_storage_json)
  50. end
  51. def local_storage=(entries)
  52. self.local_storage_json = Array(entries).to_json
  53. end
  54. def session_storage
  55. parse_json_array(session_storage_json)
  56. end
  57. def session_storage=(entries)
  58. self.session_storage_json = Array(entries).to_json
  59. end
  60. def auth_snapshot
  61. return {} if auth_snapshot_json.blank?
  62. JSON.parse(auth_snapshot_json)
  63. rescue JSON::ParserError
  64. {}
  65. end
  66. def auth_snapshot=(value)
  67. self.auth_snapshot_json = value.to_h.to_json
  68. end
  69. def session_bundle
  70. {
  71. cookies: cookies,
  72. local_storage: local_storage,
  73. session_storage: session_storage,
  74. user_agent: user_agent,
  75. auth_snapshot: auth_snapshot
  76. }
  77. end
  78. def session_bundle=(bundle)
  79. payload = bundle.to_h.deep_symbolize_keys
  80. self.cookies = payload[:cookies]
  81. self.local_storage = payload[:local_storage]
  82. self.session_storage = payload[:session_storage]
  83. self.user_agent = payload[:user_agent].presence
  84. self.auth_snapshot = payload[:auth_snapshot] || {}
  85. end
  86. def sessionid_cookie_present?
  87. cookie_named_present?("sessionid")
  88. end
  89. def csrftoken_cookie_present?
  90. cookie_named_present?("csrftoken")
  91. end
  92. def cookie_authenticated?
  93. login_state.to_s == "authenticated" && sessionid_cookie_present?
  94. end
  95. private
  96. def parse_json_array(value)
  97. return [] if value.blank?
  98. JSON.parse(value)
  99. rescue JSON::ParserError
  100. []
  101. end
  102. def cookie_named_present?(name)
  103. target = name.to_s
  104. cookies.any? do |cookie|
  105. next false unless cookie.is_a?(Hash)
  106. cookie["name"].to_s == target && cookie["value"].to_s.present?
  107. end
  108. end
  109. end

app/models/instagram_message.rb

0.0% lines covered

100.0% branches covered

15 relevant lines. 0 lines covered and 15 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramMessage < ApplicationRecord
  2. belongs_to :instagram_account
  3. belongs_to :instagram_profile
  4. validates :body, presence: true
  5. scope :recent_first, -> { order(created_at: :desc) }
  6. def queued?
  7. status == "queued"
  8. end
  9. def sent?
  10. status == "sent"
  11. end
  12. def failed?
  13. status == "failed"
  14. end
  15. end

app/models/instagram_post.rb

0.0% lines covered

100.0% branches covered

25 relevant lines. 0 lines covered and 25 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramPost < ApplicationRecord
  2. belongs_to :instagram_account
  3. belongs_to :instagram_profile, optional: true
  4. has_one_attached :media
  5. has_many :ai_analyses, as: :analyzable, dependent: :destroy
  6. has_many :instagram_post_insights, dependent: :destroy
  7. has_many :instagram_post_entities, dependent: :destroy
  8. validates :shortcode, presence: true
  9. validates :detected_at, presence: true
  10. validates :status, presence: true
  11. scope :recent_first, -> { order(detected_at: :desc, id: :desc) }
  12. after_commit :broadcast_posts_table_refresh
  13. def permalink
  14. "#{Instagram::Client::INSTAGRAM_BASE_URL}/p/#{shortcode}/"
  15. end
  16. private
  17. def broadcast_posts_table_refresh
  18. Ops::LiveUpdateBroadcaster.broadcast!(
  19. topic: "posts_table_changed",
  20. account_id: instagram_account_id,
  21. payload: { post_id: id },
  22. throttle_key: "posts_table_changed"
  23. )
  24. end
  25. end

app/models/instagram_post_entity.rb

0.0% lines covered

100.0% branches covered

8 relevant lines. 0 lines covered and 8 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramPostEntity < ApplicationRecord
  2. belongs_to :instagram_account
  3. belongs_to :instagram_post
  4. belongs_to :instagram_post_insight
  5. validates :entity_type, presence: true
  6. validates :value, presence: true
  7. scope :recent_first, -> { order(created_at: :desc, id: :desc) }
  8. end

app/models/instagram_post_face.rb

0.0% lines covered

100.0% branches covered

6 relevant lines. 0 lines covered and 6 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramPostFace < ApplicationRecord
  2. ROLES = %w[primary_user secondary_person unknown].freeze
  3. belongs_to :instagram_profile_post
  4. belongs_to :instagram_story_person, optional: true
  5. validates :role, presence: true, inclusion: { in: ROLES }
  6. end

app/models/instagram_post_insight.rb

0.0% lines covered

100.0% branches covered

7 relevant lines. 0 lines covered and 7 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramPostInsight < ApplicationRecord
  2. belongs_to :instagram_account
  3. belongs_to :instagram_post
  4. belongs_to :ai_analysis
  5. has_many :instagram_post_entities, dependent: :destroy
  6. scope :recent_first, -> { order(created_at: :desc, id: :desc) }
  7. end

app/models/instagram_profile.rb

0.0% lines covered

100.0% branches covered

92 relevant lines. 0 lines covered and 92 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramProfile < ApplicationRecord
  2. belongs_to :instagram_account
  3. has_many :instagram_messages, dependent: :destroy
  4. has_many :instagram_profile_events, dependent: :destroy
  5. has_many :instagram_profile_analyses, dependent: :destroy
  6. has_many :instagram_profile_action_logs, dependent: :destroy
  7. has_many :instagram_profile_posts, dependent: :destroy
  8. has_many :instagram_post_faces, through: :instagram_profile_posts
  9. has_many :instagram_profile_post_comments, dependent: :destroy
  10. has_many :instagram_profile_insights, dependent: :destroy
  11. has_many :instagram_profile_message_strategies, dependent: :destroy
  12. has_many :instagram_profile_signal_evidences, dependent: :destroy
  13. has_many :instagram_profile_history_chunks, dependent: :destroy
  14. has_many :instagram_stories, dependent: :destroy
  15. has_many :instagram_story_people, dependent: :destroy
  16. has_many :ai_analyses, as: :analyzable, dependent: :destroy
  17. has_many :instagram_profile_taggings, dependent: :destroy
  18. has_many :profile_tags, through: :instagram_profile_taggings
  19. has_many :app_issues, dependent: :nullify
  20. has_many :active_storage_ingestions, dependent: :nullify
  21. has_one :instagram_profile_behavior_profile, dependent: :destroy
  22. has_one_attached :avatar
  23. validates :username, presence: true
  24. after_commit :broadcast_profiles_table_refresh
  25. def mutual?
  26. following && follows_you
  27. end
  28. def display_label
  29. display_name.presence || username
  30. end
  31. def recompute_last_active!
  32. self.last_active_at = [ last_story_seen_at, last_post_at ].compact.max
  33. end
  34. def story_reply_allowed?
  35. story_interaction_state.to_s == "reply_available"
  36. end
  37. def story_reply_retry_pending?
  38. story_interaction_state.to_s == "unavailable" &&
  39. story_interaction_retry_after_at.present? &&
  40. story_interaction_retry_after_at > Time.current
  41. end
  42. def dm_allowed?
  43. dm_interaction_state.to_s == "messageable" || can_message == true
  44. end
  45. def dm_retry_pending?
  46. dm_interaction_state.to_s == "unavailable" &&
  47. dm_interaction_retry_after_at.present? &&
  48. dm_interaction_retry_after_at > Time.current
  49. end
  50. def auto_reply_enabled?
  51. profile_tags.where(name: %w[automatic_reply automatic\ reply auto_reply auto\ reply]).exists?
  52. end
  53. def record_event!(kind:, external_id:, occurred_at: nil, metadata: {})
  54. eid = external_id.to_s.strip
  55. raise ArgumentError, "external_id is required for profile events" if eid.blank?
  56. event = instagram_profile_events.find_or_initialize_by(kind: kind.to_s, external_id: eid)
  57. event.detected_at = Time.current
  58. event.occurred_at = occurred_at if occurred_at.present?
  59. event.metadata = (event.metadata || {}).merge(metadata.to_h)
  60. event.save!
  61. event
  62. end
  63. def latest_analysis
  64. ai_analyses.where(purpose: "profile").recent_first.first ||
  65. instagram_profile_analyses.recent_first.first
  66. end
  67. def history_narrative_text(max_chunks: 3)
  68. chunks = instagram_profile_history_chunks.recent_first.limit(max_chunks.to_i.clamp(1, 12)).to_a.reverse
  69. chunks.map { |chunk| chunk.content.to_s.strip }.reject(&:blank?).join("\n")
  70. end
  71. def history_narrative_chunks(max_chunks: 6)
  72. instagram_profile_history_chunks.recent_first.limit(max_chunks.to_i.clamp(1, 24)).map do |chunk|
  73. {
  74. sequence: chunk.sequence,
  75. starts_at: chunk.starts_at&.iso8601,
  76. ends_at: chunk.ends_at&.iso8601,
  77. word_count: chunk.word_count,
  78. entry_count: chunk.entry_count,
  79. content: chunk.content.to_s
  80. }
  81. end
  82. end
  83. private
  84. def broadcast_profiles_table_refresh
  85. Ops::LiveUpdateBroadcaster.broadcast!(
  86. topic: "profiles_table_changed",
  87. account_id: instagram_account_id,
  88. payload: { profile_id: id },
  89. throttle_key: "profiles_table_changed"
  90. )
  91. end
  92. end

app/models/instagram_profile_action_log.rb

0.0% lines covered

100.0% branches covered

62 relevant lines. 0 lines covered and 62 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramProfileActionLog < ApplicationRecord
  2. ACTIONS = %w[
  3. fetch_profile_details
  4. verify_messageability
  5. analyze_profile
  6. analyze_profile_posts
  7. capture_profile_posts
  8. build_history
  9. sync_avatar
  10. sync_stories
  11. sync_stories_debug
  12. auto_story_reply
  13. post_comment
  14. ].freeze
  15. STATUSES = %w[queued running succeeded failed].freeze
  16. belongs_to :instagram_account
  17. belongs_to :instagram_profile
  18. encrypts :log_text
  19. after_commit :broadcast_account_audit_logs_refresh
  20. validates :action, presence: true, inclusion: { in: ACTIONS }
  21. validates :status, presence: true, inclusion: { in: STATUSES }
  22. validates :occurred_at, presence: true
  23. scope :recent_first, -> { order(occurred_at: :desc, id: :desc) }
  24. def mark_running!(extra_metadata: nil)
  25. update!(
  26. status: "running",
  27. started_at: started_at || Time.current,
  28. metadata: merge_metadata(extra_metadata),
  29. error_message: nil
  30. )
  31. end
  32. def mark_succeeded!(extra_metadata: nil, log_text: nil)
  33. update!(
  34. status: "succeeded",
  35. finished_at: Time.current,
  36. metadata: merge_metadata(extra_metadata),
  37. log_text: log_text.presence || self.log_text,
  38. error_message: nil
  39. )
  40. end
  41. def mark_failed!(error_message:, extra_metadata: nil)
  42. update!(
  43. status: "failed",
  44. finished_at: Time.current,
  45. metadata: merge_metadata(extra_metadata),
  46. error_message: error_message.to_s
  47. )
  48. end
  49. private
  50. def broadcast_account_audit_logs_refresh
  51. account = instagram_account
  52. return unless account
  53. RefreshAccountAuditLogsJob.enqueue_for(instagram_account_id: account.id, limit: 120)
  54. rescue StandardError
  55. nil
  56. end
  57. def merge_metadata(extra)
  58. base = metadata.is_a?(Hash) ? metadata : {}
  59. return base if extra.blank?
  60. base.merge(extra.to_h)
  61. end
  62. end

app/models/instagram_profile_analysis.rb

0.0% lines covered

100.0% branches covered

8 relevant lines. 0 lines covered and 8 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramProfileAnalysis < ApplicationRecord
  2. belongs_to :instagram_profile
  3. # These contain potentially sensitive derived notes; keep them encrypted at rest.
  4. encrypts :prompt
  5. encrypts :response_text
  6. validates :provider, presence: true
  7. validates :status, presence: true
  8. scope :recent_first, -> { order(created_at: :desc, id: :desc) }
  9. end

app/models/instagram_profile_behavior_profile.rb

0.0% lines covered

100.0% branches covered

3 relevant lines. 0 lines covered and 3 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramProfileBehaviorProfile < ApplicationRecord
  2. belongs_to :instagram_profile
  3. end

app/models/instagram_profile_event.rb

0.0% lines covered

100.0% branches covered

1485 relevant lines. 0 lines covered and 1485 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "digest"
  2. class InstagramProfileEvent < ApplicationRecord
  3. class LocalStoryIntelligenceUnavailableError < StandardError
  4. attr_reader :reason, :source
  5. def initialize(message = nil, reason: nil, source: nil)
  6. @reason = reason.to_s.presence
  7. @source = source.to_s.presence
  8. super(message || "Local story intelligence unavailable")
  9. end
  10. end
  11. belongs_to :instagram_profile
  12. has_one_attached :media
  13. has_one_attached :preview_image
  14. has_many :instagram_stories, foreign_key: :source_event_id, dependent: :nullify
  15. validates :kind, presence: true
  16. validates :external_id, presence: true
  17. validates :detected_at, presence: true
  18. # LLM Comment validations
  19. validates :llm_comment_provider, inclusion: { in: %w[ollama local], allow_nil: true }
  20. validates :llm_comment_status, inclusion: { in: %w[not_requested queued running completed failed skipped], allow_nil: true }
  21. validate :llm_comment_consistency, on: :update
  22. after_commit :broadcast_account_audit_logs_refresh
  23. after_commit :broadcast_story_archive_refresh, on: %i[create update]
  24. after_commit :append_profile_history_narrative, on: :create
  25. after_commit :broadcast_profile_events_refresh
  26. STORY_ARCHIVE_EVENT_KINDS = %w[
  27. story_downloaded
  28. story_image_downloaded_via_feed
  29. story_media_downloaded_via_feed
  30. ].freeze
  31. LLM_SUCCESS_STATUSES = %w[ok].freeze
  32. def has_llm_generated_comment?
  33. llm_generated_comment.present?
  34. end
  35. def llm_comment_in_progress?
  36. %w[queued running].include?(llm_comment_status.to_s)
  37. end
  38. def queue_llm_comment_generation!(job_id: nil)
  39. update!(
  40. llm_comment_status: "queued",
  41. llm_comment_job_id: job_id.to_s.presence || llm_comment_job_id,
  42. llm_comment_last_error: nil
  43. )
  44. broadcast_llm_comment_generation_queued(job_id: job_id)
  45. end
  46. def mark_llm_comment_running!(job_id: nil)
  47. update!(
  48. llm_comment_status: "running",
  49. llm_comment_job_id: job_id.to_s.presence || llm_comment_job_id,
  50. llm_comment_attempts: llm_comment_attempts.to_i + 1,
  51. llm_comment_last_error: nil
  52. )
  53. broadcast_llm_comment_generation_start
  54. end
  55. def mark_llm_comment_failed!(error:)
  56. update!(
  57. llm_comment_status: "failed",
  58. llm_comment_last_error: error.message.to_s,
  59. llm_comment_metadata: (llm_comment_metadata.is_a?(Hash) ? llm_comment_metadata : {}).merge(
  60. "last_failure" => {
  61. "error_class" => error.class.name,
  62. "error_message" => error.message.to_s,
  63. "failed_at" => Time.current.iso8601
  64. }
  65. )
  66. )
  67. broadcast_llm_comment_generation_error(error.message)
  68. rescue StandardError
  69. nil
  70. end
  71. def mark_llm_comment_skipped!(message:, reason: nil, source: nil)
  72. intel_status =
  73. if source.to_s == "validated_story_policy"
  74. "policy_blocked"
  75. else
  76. "unavailable"
  77. end
  78. details = {
  79. "error_class" => "LocalStoryIntelligenceUnavailableError",
  80. "error_message" => message.to_s,
  81. "failed_at" => Time.current.iso8601,
  82. "reason" => reason.to_s.presence,
  83. "source" => source.to_s.presence
  84. }.compact
  85. update!(
  86. llm_comment_status: "skipped",
  87. llm_comment_last_error: message.to_s,
  88. llm_comment_metadata: (llm_comment_metadata.is_a?(Hash) ? llm_comment_metadata : {}).merge(
  89. "last_failure" => details,
  90. "local_story_intelligence_status" => intel_status
  91. )
  92. )
  93. broadcast_llm_comment_generation_skipped(
  94. message: message.to_s,
  95. reason: reason,
  96. source: source
  97. )
  98. rescue StandardError
  99. nil
  100. end
  101. def generate_llm_comment!(provider: :local, model: nil)
  102. if has_llm_generated_comment?
  103. update_columns(
  104. llm_comment_status: "completed",
  105. llm_comment_last_error: nil,
  106. updated_at: Time.current
  107. )
  108. return {
  109. status: "already_completed",
  110. selected_comment: llm_generated_comment,
  111. relevance_score: llm_comment_relevance_score
  112. }
  113. end
  114. started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC) rescue nil
  115. context = build_comment_context
  116. local_intel = context[:local_story_intelligence].is_a?(Hash) ? context[:local_story_intelligence] : {}
  117. validated_story_insights = context[:validated_story_insights].is_a?(Hash) ? context[:validated_story_insights] : {}
  118. generation_policy = validated_story_insights[:generation_policy].is_a?(Hash) ? validated_story_insights[:generation_policy] : {}
  119. persist_validated_story_insights!(validated_story_insights)
  120. persist_local_story_intelligence!(local_intel)
  121. if local_story_intelligence_blank?(local_intel)
  122. reason = local_intel[:reason].to_s.presence || "local_story_intelligence_blank"
  123. source = local_intel[:source].to_s.presence || "unknown"
  124. raise LocalStoryIntelligenceUnavailableError.new(
  125. "Local story intelligence unavailable (reason: #{reason}, source: #{source}).",
  126. reason: reason,
  127. source: source
  128. )
  129. end
  130. unless ActiveModel::Type::Boolean.new.cast(generation_policy[:allow_comment])
  131. policy_reason_code = generation_policy[:reason_code].to_s.presence || "policy_blocked"
  132. policy_reason = generation_policy[:reason].to_s.presence || "Comment generation blocked by verified story policy."
  133. raise LocalStoryIntelligenceUnavailableError.new(
  134. policy_reason,
  135. reason: policy_reason_code,
  136. source: "validated_story_policy"
  137. )
  138. end
  139. broadcast_llm_comment_generation_progress(stage: "context_ready", message: "Context prepared from local story intelligence.", progress: 20)
  140. technical_details = capture_technical_details(context)
  141. broadcast_llm_comment_generation_progress(stage: "model_running", message: "Generating suggestions with local model.", progress: 55)
  142. generator = Ai::LocalEngagementCommentGenerator.new(
  143. ollama_client: Ai::OllamaClient.new,
  144. model: model
  145. )
  146. result = generator.generate!(
  147. post_payload: context[:post_payload],
  148. image_description: context[:image_description],
  149. topics: context[:topics],
  150. author_type: context[:author_type],
  151. historical_comments: context[:historical_comments],
  152. historical_context: context[:historical_context],
  153. historical_story_context: context[:historical_story_context],
  154. local_story_intelligence: context[:local_story_intelligence],
  155. historical_comparison: context[:historical_comparison],
  156. cv_ocr_evidence: context[:cv_ocr_evidence],
  157. verified_story_facts: context[:verified_story_facts],
  158. story_ownership_classification: context[:story_ownership_classification],
  159. generation_policy: context[:generation_policy],
  160. profile_preparation: context[:profile_preparation],
  161. verified_profile_history: context[:verified_profile_history],
  162. conversational_voice: context[:conversational_voice]
  163. )
  164. enhanced_result = result.merge(technical_details: technical_details)
  165. unless LLM_SUCCESS_STATUSES.include?(result[:status].to_s)
  166. raise "Local pipeline did not produce valid model suggestions (fallback blocked): #{result[:error_message]}"
  167. end
  168. ranked = Ai::CommentRelevanceScorer.rank(
  169. suggestions: result[:comment_suggestions],
  170. image_description: context[:image_description],
  171. topics: context[:topics],
  172. historical_comments: context[:historical_comments]
  173. )
  174. selected_comment, score = ranked.first
  175. raise "No valid comment suggestions generated" if selected_comment.to_s.blank?
  176. duration_ms =
  177. if started_at
  178. ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at) * 1000.0).round
  179. end
  180. update!(
  181. llm_generated_comment: selected_comment,
  182. llm_comment_generated_at: Time.current,
  183. llm_comment_model: result[:model],
  184. llm_comment_provider: provider.to_s,
  185. llm_comment_status: "completed",
  186. llm_comment_relevance_score: score,
  187. llm_comment_last_error: nil,
  188. llm_comment_metadata: (llm_comment_metadata.is_a?(Hash) ? llm_comment_metadata : {}).merge(
  189. "prompt" => result[:prompt],
  190. "source" => result[:source],
  191. "fallback_used" => ActiveModel::Type::Boolean.new.cast(result[:fallback_used]),
  192. "generation_status" => result[:status],
  193. "technical_details" => technical_details,
  194. "local_story_intelligence" => context[:local_story_intelligence],
  195. "historical_story_context_used" => Array(context[:historical_story_context]).first(12),
  196. "historical_comparison" => context[:historical_comparison],
  197. "cv_ocr_evidence" => context[:cv_ocr_evidence],
  198. "verified_story_facts" => context[:verified_story_facts],
  199. "ownership_classification" => context[:story_ownership_classification],
  200. "generation_policy" => context[:generation_policy],
  201. "validated_story_insights" => context[:validated_story_insights],
  202. "ranked_candidates" => ranked.first(8).map { |text, value| { "comment" => text, "score" => value } },
  203. "selected_comment" => selected_comment,
  204. "selected_relevance_score" => score,
  205. "generated_at" => Time.current.iso8601,
  206. "processing_ms" => duration_ms,
  207. "pipeline" => "validated_story_intelligence_v3"
  208. )
  209. )
  210. broadcast_llm_comment_generation_progress(stage: "completed", message: "Comment ready.", progress: 100)
  211. broadcast_story_archive_refresh
  212. broadcast_llm_comment_generation_update(
  213. enhanced_result.merge(
  214. selected_comment: selected_comment,
  215. relevance_score: score,
  216. ranked_candidates: ranked.first(8)
  217. )
  218. )
  219. enhanced_result.merge(
  220. selected_comment: selected_comment,
  221. relevance_score: score,
  222. ranked_candidates: ranked.first(8)
  223. )
  224. end
  225. def reply_comment
  226. metadata["reply_comment"] if metadata.is_a?(Hash)
  227. end
  228. def story_archive_item?
  229. STORY_ARCHIVE_EVENT_KINDS.include?(kind.to_s)
  230. end
  231. def capture_technical_details(context)
  232. profile = instagram_profile
  233. media_blob = media.attached? ? media.blob : nil
  234. timeline = story_timeline_data
  235. local_intelligence = context[:local_story_intelligence].is_a?(Hash) ? context[:local_story_intelligence] : {}
  236. verified_story_facts = context[:verified_story_facts].is_a?(Hash) ? context[:verified_story_facts] : {}
  237. story_ownership_classification = context[:story_ownership_classification].is_a?(Hash) ? context[:story_ownership_classification] : {}
  238. generation_policy = context[:generation_policy].is_a?(Hash) ? context[:generation_policy] : {}
  239. validated_story_insights = context[:validated_story_insights].is_a?(Hash) ? context[:validated_story_insights] : {}
  240. profile_preparation = context[:profile_preparation].is_a?(Hash) ? context[:profile_preparation] : {}
  241. verified_profile_history = Array(context[:verified_profile_history]).first(12)
  242. conversational_voice = context[:conversational_voice].is_a?(Hash) ? context[:conversational_voice] : {}
  243. {
  244. timestamp: Time.current.iso8601,
  245. event_id: id,
  246. story_id: metadata.is_a?(Hash) ? metadata["story_id"] : nil,
  247. timeline: timeline,
  248. media_info: media_blob ? {
  249. content_type: media_blob.content_type,
  250. size_bytes: media_blob.byte_size,
  251. dimensions: metadata.is_a?(Hash) ? metadata.slice("media_width", "media_height") : {},
  252. url: Rails.application.routes.url_helpers.rails_blob_path(media, only_path: true)
  253. } : {},
  254. local_story_intelligence: local_intelligence,
  255. analysis: {
  256. verified_story_facts: verified_story_facts,
  257. ownership_classification: story_ownership_classification,
  258. generation_policy: generation_policy,
  259. validated_story_insights: validated_story_insights,
  260. cv_ocr_evidence: context[:cv_ocr_evidence],
  261. historical_comparison: context[:historical_comparison],
  262. extraction_summary: {
  263. has_ocr_text: verified_story_facts[:ocr_text].to_s.present?,
  264. has_transcript: verified_story_facts[:transcript].to_s.present?,
  265. objects_count: Array(verified_story_facts[:objects]).size,
  266. object_detections_count: Array(verified_story_facts[:object_detections]).size,
  267. scenes_count: Array(verified_story_facts[:scenes]).size,
  268. hashtags_count: Array(verified_story_facts[:hashtags]).size,
  269. mentions_count: Array(verified_story_facts[:mentions]).size,
  270. detected_usernames_count: Array(verified_story_facts[:detected_usernames]).size,
  271. faces_count: verified_story_facts[:face_count].to_i,
  272. signal_score: verified_story_facts[:signal_score].to_i,
  273. source: verified_story_facts[:source].to_s,
  274. reason: verified_story_facts[:reason].to_s.presence
  275. }
  276. },
  277. profile_analysis: {
  278. username: profile&.username,
  279. display_name: profile&.display_name,
  280. bio: profile&.bio,
  281. bio_length: profile&.bio&.length || 0,
  282. detected_author_type: determine_author_type(profile),
  283. extracted_topics: extract_topics_from_profile(profile),
  284. profile_comment_preparation: profile_preparation,
  285. conversational_voice: conversational_voice,
  286. verified_profile_history: verified_profile_history
  287. },
  288. prompt_engineering: {
  289. final_prompt: context[:post_payload],
  290. image_description: context[:image_description],
  291. topics_used: context[:topics],
  292. author_classification: context[:author_type],
  293. historical_context: context[:historical_context],
  294. historical_story_context: Array(context[:historical_story_context]).first(10),
  295. historical_comparison: context[:historical_comparison],
  296. verified_story_facts: verified_story_facts,
  297. ownership_classification: story_ownership_classification,
  298. generation_policy: generation_policy,
  299. cv_ocr_evidence: context[:cv_ocr_evidence],
  300. profile_comment_preparation: profile_preparation,
  301. conversational_voice: conversational_voice,
  302. verified_profile_history: verified_profile_history,
  303. rules_applied: context[:post_payload]&.dig(:rules)
  304. }
  305. }
  306. end
  307. def broadcast_llm_comment_generation_queued(job_id: nil)
  308. account = instagram_profile&.instagram_account
  309. return unless account
  310. ActionCable.server.broadcast(
  311. "llm_comment_generation_#{account.id}",
  312. {
  313. event_id: id,
  314. status: "queued",
  315. job_id: job_id.to_s.presence || llm_comment_job_id,
  316. message: "Comment generation queued",
  317. estimated_seconds: estimated_generation_seconds(queue_state: true),
  318. progress: 5
  319. }
  320. )
  321. rescue StandardError
  322. nil
  323. end
  324. def broadcast_llm_comment_generation_update(generation_result)
  325. account = instagram_profile&.instagram_account
  326. return unless account
  327. ActionCable.server.broadcast(
  328. "llm_comment_generation_#{account.id}",
  329. {
  330. event_id: id,
  331. status: "completed",
  332. comment: llm_generated_comment,
  333. generated_at: llm_comment_generated_at,
  334. model: llm_comment_model,
  335. provider: llm_comment_provider,
  336. relevance_score: llm_comment_relevance_score,
  337. generation_result: generation_result
  338. }
  339. )
  340. rescue StandardError
  341. nil
  342. end
  343. def broadcast_llm_comment_generation_start
  344. account = instagram_profile&.instagram_account
  345. return unless account
  346. ActionCable.server.broadcast(
  347. "llm_comment_generation_#{account.id}",
  348. {
  349. event_id: id,
  350. status: "started",
  351. message: "Generating comment...",
  352. estimated_seconds: estimated_generation_seconds(queue_state: false),
  353. progress: 12
  354. }
  355. )
  356. rescue StandardError
  357. nil
  358. end
  359. def broadcast_llm_comment_generation_error(error_message)
  360. account = instagram_profile&.instagram_account
  361. return unless account
  362. ActionCable.server.broadcast(
  363. "llm_comment_generation_#{account.id}",
  364. {
  365. event_id: id,
  366. status: "error",
  367. error: error_message,
  368. message: "Failed to generate comment"
  369. }
  370. )
  371. rescue StandardError
  372. nil
  373. end
  374. def broadcast_llm_comment_generation_skipped(message:, reason: nil, source: nil)
  375. account = instagram_profile&.instagram_account
  376. return unless account
  377. ActionCable.server.broadcast(
  378. "llm_comment_generation_#{account.id}",
  379. {
  380. event_id: id,
  381. status: "skipped",
  382. message: message.to_s.presence || "Comment generation skipped",
  383. reason: reason.to_s.presence,
  384. source: source.to_s.presence
  385. }.compact
  386. )
  387. rescue StandardError
  388. nil
  389. end
  390. def broadcast_llm_comment_generation_progress(stage:, message:, progress:)
  391. account = instagram_profile&.instagram_account
  392. return unless account
  393. ActionCable.server.broadcast(
  394. "llm_comment_generation_#{account.id}",
  395. {
  396. event_id: id,
  397. status: "running",
  398. stage: stage.to_s,
  399. message: message.to_s,
  400. progress: progress.to_i.clamp(0, 100),
  401. estimated_seconds: estimated_generation_seconds(queue_state: false)
  402. }
  403. )
  404. rescue StandardError
  405. nil
  406. end
  407. def self.broadcast_story_archive_refresh!(account:)
  408. return unless account
  409. Turbo::StreamsChannel.broadcast_replace_to(
  410. [account, :story_archive],
  411. target: "story_media_archive_refresh_signal",
  412. partial: "instagram_accounts/story_archive_refresh_signal",
  413. locals: { refreshed_at: Time.current }
  414. )
  415. rescue StandardError
  416. nil
  417. end
  418. private
  419. def broadcast_account_audit_logs_refresh
  420. account = instagram_profile&.instagram_account
  421. return unless account
  422. RefreshAccountAuditLogsJob.enqueue_for(instagram_account_id: account.id, limit: 120)
  423. rescue StandardError
  424. nil
  425. end
  426. def append_profile_history_narrative
  427. AppendProfileHistoryNarrativeJob.perform_later(
  428. instagram_profile_event_id: id,
  429. mode: "event"
  430. )
  431. rescue StandardError
  432. nil
  433. end
  434. def broadcast_story_archive_refresh
  435. return unless STORY_ARCHIVE_EVENT_KINDS.include?(kind.to_s)
  436. account = instagram_profile&.instagram_account
  437. self.class.broadcast_story_archive_refresh!(account: account)
  438. rescue StandardError
  439. nil
  440. end
  441. def broadcast_profile_events_refresh
  442. account_id = instagram_profile&.instagram_account_id
  443. return unless account_id
  444. Ops::LiveUpdateBroadcaster.broadcast!(
  445. topic: "profile_events_changed",
  446. account_id: account_id,
  447. payload: { profile_id: instagram_profile_id, event_id: id },
  448. throttle_key: "profile_events_changed:#{instagram_profile_id}"
  449. )
  450. rescue StandardError
  451. nil
  452. end
  453. def llm_comment_consistency
  454. status = llm_comment_status.to_s
  455. if status == "completed" && llm_generated_comment.blank?
  456. errors.add(:llm_generated_comment, "must be present when status is completed")
  457. end
  458. if status == "completed" && llm_comment_generated_at.blank?
  459. errors.add(:llm_comment_generated_at, "must be present when status is completed")
  460. end
  461. if status == "completed" && llm_comment_provider.blank?
  462. errors.add(:llm_comment_provider, "must be present when status is completed")
  463. end
  464. if llm_generated_comment.blank? && llm_comment_generated_at.present?
  465. errors.add(:llm_generated_comment, "must be present when generated_at is set")
  466. end
  467. end
  468. def build_comment_context
  469. profile = instagram_profile
  470. raw_metadata = metadata.is_a?(Hash) ? metadata : {}
  471. local_story_intelligence = local_story_intelligence_payload
  472. validated_story_insights = Ai::VerifiedStoryInsightBuilder.new(
  473. profile: profile,
  474. local_story_intelligence: local_story_intelligence,
  475. metadata: raw_metadata
  476. ).build
  477. verified_story_facts = validated_story_insights[:verified_story_facts].is_a?(Hash) ? validated_story_insights[:verified_story_facts] : {}
  478. post_payload = {
  479. post: {
  480. event_id: id,
  481. media_type: raw_metadata["media_type"].to_s.presence || media&.blob&.content_type.to_s.presence || "unknown"
  482. },
  483. author_profile: {
  484. username: profile&.username,
  485. display_name: profile&.display_name,
  486. bio_keywords: extract_topics_from_profile(profile).first(10)
  487. },
  488. rules: {
  489. max_length: 140,
  490. require_local_pipeline: true,
  491. require_verified_story_facts: true,
  492. block_unverified_generation: true,
  493. verified_only: true
  494. }
  495. }
  496. image_description = build_story_image_description(local_story_intelligence: verified_story_facts.presence || local_story_intelligence)
  497. historical_comments = recent_llm_comments_for_profile(profile)
  498. topics = (Array(verified_story_facts[:topics]) + extract_topics_from_profile(profile)).map(&:to_s).reject(&:blank?).uniq.first(20)
  499. historical_story_context = recent_story_intelligence_context(profile)
  500. profile_preparation = latest_profile_comment_preparation(profile)
  501. verified_profile_history = recent_analyzed_profile_history(profile)
  502. conversational_voice = build_conversational_voice_profile(
  503. profile: profile,
  504. historical_story_context: historical_story_context,
  505. verified_profile_history: verified_profile_history,
  506. profile_preparation: profile_preparation
  507. )
  508. historical_comparison = build_historical_comparison(
  509. current: verified_story_facts.presence || local_story_intelligence,
  510. historical_story_context: historical_story_context
  511. )
  512. validated_story_insights = apply_historical_validation(
  513. validated_story_insights: validated_story_insights,
  514. historical_comparison: historical_comparison
  515. )
  516. story_ownership_classification = validated_story_insights[:ownership_classification].is_a?(Hash) ? validated_story_insights[:ownership_classification] : {}
  517. generation_policy = validated_story_insights[:generation_policy].is_a?(Hash) ? validated_story_insights[:generation_policy] : {}
  518. cv_ocr_evidence = build_cv_ocr_evidence(local_story_intelligence: verified_story_facts.presence || local_story_intelligence)
  519. post_payload[:historical_comparison] = historical_comparison
  520. post_payload[:cv_ocr_evidence] = cv_ocr_evidence
  521. post_payload[:story_ownership_classification] = story_ownership_classification
  522. post_payload[:generation_policy] = generation_policy
  523. post_payload[:profile_comment_preparation] = profile_preparation
  524. post_payload[:conversational_voice] = conversational_voice
  525. post_payload[:verified_profile_history] = verified_profile_history
  526. historical_context = build_compact_historical_context(
  527. profile: profile,
  528. historical_story_context: historical_story_context,
  529. verified_profile_history: verified_profile_history,
  530. profile_preparation: profile_preparation
  531. )
  532. {
  533. post_payload: post_payload,
  534. image_description: image_description,
  535. topics: topics,
  536. author_type: determine_author_type(profile),
  537. historical_comments: historical_comments,
  538. historical_context: historical_context,
  539. historical_story_context: historical_story_context,
  540. historical_comparison: historical_comparison,
  541. cv_ocr_evidence: cv_ocr_evidence,
  542. local_story_intelligence: local_story_intelligence,
  543. verified_story_facts: verified_story_facts,
  544. story_ownership_classification: story_ownership_classification,
  545. generation_policy: generation_policy,
  546. validated_story_insights: validated_story_insights,
  547. profile_preparation: profile_preparation,
  548. verified_profile_history: verified_profile_history,
  549. conversational_voice: conversational_voice
  550. }
  551. end
  552. def local_story_intelligence_payload
  553. raw = metadata.is_a?(Hash) ? metadata : {}
  554. story = instagram_stories.order(updated_at: :desc, id: :desc).first
  555. story_meta = story&.metadata.is_a?(Hash) ? story.metadata : {}
  556. story_embedded = story_meta["content_understanding"].is_a?(Hash) ? story_meta["content_understanding"] : {}
  557. event_embedded = raw["local_story_intelligence"].is_a?(Hash) ? raw["local_story_intelligence"] : {}
  558. embedded = story_embedded.presence || event_embedded.presence || {}
  559. ocr_text = first_present(
  560. embedded["ocr_text"],
  561. event_embedded["ocr_text"],
  562. story_meta["ocr_text"],
  563. raw["ocr_text"]
  564. )
  565. transcript = first_present(
  566. embedded["transcript"],
  567. event_embedded["transcript"],
  568. story_meta["transcript"],
  569. raw["transcript"]
  570. )
  571. objects = merge_unique_values(
  572. embedded["objects"],
  573. event_embedded["objects"],
  574. story_meta["content_signals"],
  575. raw["content_signals"]
  576. )
  577. hashtags = merge_unique_values(
  578. embedded["hashtags"],
  579. event_embedded["hashtags"],
  580. story_meta["hashtags"],
  581. raw["hashtags"]
  582. )
  583. mentions = merge_unique_values(
  584. embedded["mentions"],
  585. event_embedded["mentions"],
  586. story_meta["mentions"],
  587. raw["mentions"]
  588. )
  589. profile_handles = merge_unique_values(
  590. embedded["profile_handles"],
  591. event_embedded["profile_handles"],
  592. story_meta["profile_handles"],
  593. raw["profile_handles"]
  594. )
  595. scenes = normalize_hash_array(
  596. embedded["scenes"],
  597. event_embedded["scenes"],
  598. story_meta["scenes"],
  599. raw["scenes"]
  600. )
  601. ocr_blocks = normalize_hash_array(
  602. embedded["ocr_blocks"],
  603. event_embedded["ocr_blocks"],
  604. story_meta["ocr_blocks"],
  605. raw["ocr_blocks"]
  606. )
  607. ocr_text_from_blocks = ocr_blocks
  608. .map { |row| row.is_a?(Hash) ? (row["text"] || row[:text]) : nil }
  609. .map(&:to_s)
  610. .map(&:strip)
  611. .reject(&:blank?)
  612. .uniq
  613. .join("\n")
  614. .presence
  615. ocr_text = first_present(ocr_text, ocr_text_from_blocks)
  616. if hashtags.empty? && ocr_text.to_s.present?
  617. hashtags = ocr_text.to_s.scan(/#[a-zA-Z0-9_]+/).map(&:downcase).uniq.first(20)
  618. end
  619. if mentions.empty? && ocr_text.to_s.present?
  620. mentions = ocr_text.to_s.scan(/@[a-zA-Z0-9._]+/).map(&:downcase).uniq.first(20)
  621. end
  622. if profile_handles.empty? && ocr_text.to_s.present?
  623. profile_handles = ocr_text.to_s.scan(/\b[a-zA-Z0-9._]{3,30}\b/)
  624. .map(&:downcase)
  625. .select { |token| token.include?("_") || token.include?(".") }
  626. .reject { |token| token.include?("instagram.com") }
  627. .uniq
  628. .first(30)
  629. end
  630. object_detections = normalize_hash_array(
  631. embedded["object_detections"],
  632. event_embedded["object_detections"],
  633. story_meta["object_detections"],
  634. raw["object_detections"]
  635. )
  636. detected_object_labels = object_detections
  637. .map { |row| row.is_a?(Hash) ? (row[:label] || row["label"] || row[:description] || row["description"]) : nil }
  638. .map(&:to_s)
  639. .map(&:strip)
  640. .reject(&:blank?)
  641. objects = merge_unique_values(objects, detected_object_labels)
  642. topics = merge_unique_values(
  643. embedded["topics"],
  644. objects,
  645. hashtags.map { |tag| tag.to_s.delete_prefix("#") }
  646. )
  647. normalized_people = normalize_people_rows(
  648. event_embedded["people"],
  649. raw["face_people"],
  650. raw["people"],
  651. story_meta["face_people"],
  652. story_meta["participants"],
  653. story_meta.dig("face_identity", "participants"),
  654. raw["participants"],
  655. raw.dig("face_identity", "participants")
  656. )
  657. computed_face_count = [
  658. (event_embedded["face_count"] || embedded["faces_count"] || raw["face_count"]).to_i,
  659. normalized_people.size
  660. ].max
  661. payload = {
  662. ocr_text: ocr_text.to_s.presence,
  663. transcript: transcript.to_s.presence,
  664. objects: objects,
  665. hashtags: hashtags,
  666. mentions: mentions,
  667. profile_handles: profile_handles,
  668. topics: topics,
  669. scenes: scenes.first(80),
  670. ocr_blocks: ocr_blocks.first(120),
  671. object_detections: normalize_object_detections(object_detections, limit: 120),
  672. face_count: computed_face_count,
  673. people: normalized_people.first(12),
  674. source_account_reference: extract_source_account_reference(raw: raw, story_meta: story_meta),
  675. source_profile_ids: extract_source_profile_ids_from_metadata(raw: raw, story_meta: story_meta),
  676. media_type: raw["media_type"].to_s.presence || story_meta["media_type"].to_s.presence || media&.blob&.content_type.to_s.presence,
  677. source: if story_embedded.present?
  678. "story_processing"
  679. elsif event_embedded.present?
  680. "event_local_pipeline"
  681. else
  682. "event_metadata"
  683. end
  684. }
  685. needs_structured_enrichment =
  686. media.attached? &&
  687. Array(payload[:object_detections]).empty? &&
  688. Array(payload[:ocr_blocks]).empty? &&
  689. Array(payload[:scenes]).empty?
  690. if needs_structured_enrichment
  691. extracted = extract_live_local_intelligence_from_event_media(story_id: raw["story_id"].to_s.presence || id.to_s)
  692. if extracted.is_a?(Hash)
  693. merged_scenes = normalize_hash_array(payload[:scenes], extracted[:scenes]).first(80)
  694. merged_ocr_blocks = normalize_hash_array(payload[:ocr_blocks], extracted[:ocr_blocks]).first(120)
  695. merged_object_detections = normalize_object_detections(payload[:object_detections], extracted[:object_detections], limit: 120)
  696. payload[:scenes] = merged_scenes
  697. payload[:ocr_blocks] = merged_ocr_blocks
  698. payload[:object_detections] = merged_object_detections
  699. if merged_scenes.any? || merged_ocr_blocks.any? || merged_object_detections.any?
  700. payload[:source] = "live_local_enrichment"
  701. end
  702. end
  703. end
  704. if local_story_intelligence_blank?(payload) && media.attached?
  705. extracted = extract_live_local_intelligence_from_event_media(story_id: raw["story_id"].to_s.presence || id.to_s)
  706. if extracted.is_a?(Hash)
  707. if !local_story_intelligence_blank?(extracted)
  708. payload = extracted
  709. elsif extracted[:reason].to_s.present?
  710. payload[:reason] = extracted[:reason].to_s
  711. end
  712. end
  713. end
  714. if local_story_intelligence_blank?(payload)
  715. payload[:source] = "unavailable"
  716. payload[:reason] = payload[:reason].to_s.presence || "local_ai_extraction_empty"
  717. end
  718. payload
  719. rescue StandardError
  720. {
  721. ocr_text: nil,
  722. transcript: nil,
  723. objects: [],
  724. hashtags: [],
  725. mentions: [],
  726. profile_handles: [],
  727. topics: [],
  728. scenes: [],
  729. ocr_blocks: [],
  730. object_detections: [],
  731. source: "unavailable"
  732. }
  733. end
  734. def persist_local_story_intelligence!(payload)
  735. return unless payload.is_a?(Hash)
  736. source = payload[:source].to_s
  737. return if source.blank? || source == "unavailable"
  738. current_meta = metadata.is_a?(Hash) ? metadata.deep_dup : {}
  739. current_intel = current_meta["local_story_intelligence"].is_a?(Hash) ? current_meta["local_story_intelligence"] : {}
  740. current_meta["ocr_text"] = payload[:ocr_text].to_s if payload[:ocr_text].present?
  741. current_meta["transcript"] = payload[:transcript].to_s if payload[:transcript].present?
  742. current_meta["content_signals"] = Array(payload[:objects]).map(&:to_s).reject(&:blank?).first(40)
  743. current_meta["hashtags"] = Array(payload[:hashtags]).map(&:to_s).reject(&:blank?).first(20)
  744. current_meta["mentions"] = Array(payload[:mentions]).map(&:to_s).reject(&:blank?).first(20)
  745. current_meta["profile_handles"] = Array(payload[:profile_handles]).map(&:to_s).reject(&:blank?).first(30)
  746. current_meta["topics"] = Array(payload[:topics]).map(&:to_s).reject(&:blank?).first(40)
  747. current_meta["scenes"] = normalize_hash_array(payload[:scenes]).first(80)
  748. current_meta["ocr_blocks"] = normalize_hash_array(payload[:ocr_blocks]).first(120)
  749. current_meta["object_detections"] = normalize_object_detections(payload[:object_detections], limit: 120)
  750. current_meta["face_count"] = payload[:face_count].to_i if payload[:face_count].to_i.positive?
  751. current_meta["face_people"] = Array(payload[:people]).first(12) if Array(payload[:people]).any?
  752. current_meta["local_story_intelligence"] = {
  753. "source" => source,
  754. "captured_at" => Time.current.iso8601,
  755. "ocr_text" => payload[:ocr_text].to_s.presence,
  756. "transcript" => payload[:transcript].to_s.presence,
  757. "objects" => Array(payload[:objects]).first(40),
  758. "hashtags" => Array(payload[:hashtags]).first(30),
  759. "mentions" => Array(payload[:mentions]).first(30),
  760. "profile_handles" => Array(payload[:profile_handles]).first(30),
  761. "topics" => Array(payload[:topics]).first(40),
  762. "scenes" => normalize_hash_array(payload[:scenes]).first(80),
  763. "ocr_blocks" => normalize_hash_array(payload[:ocr_blocks]).first(120),
  764. "object_detections" => normalize_object_detections(payload[:object_detections], limit: 120),
  765. "face_count" => payload[:face_count].to_i,
  766. "people" => Array(payload[:people]).first(12)
  767. }
  768. current_meta["local_story_intelligence_history_appended_at"] = Time.current.iso8601
  769. update_columns(metadata: current_meta, updated_at: Time.current)
  770. ownership = current_meta["story_ownership_classification"].is_a?(Hash) ? current_meta["story_ownership_classification"] : {}
  771. policy = current_meta["story_generation_policy"].is_a?(Hash) ? current_meta["story_generation_policy"] : {}
  772. return if story_excluded_from_narrative?(ownership: ownership, policy: policy)
  773. history_payload = payload.merge(description: build_story_image_description(local_story_intelligence: payload))
  774. AppendProfileHistoryNarrativeJob.perform_later(
  775. instagram_profile_event_id: id,
  776. mode: "story_intelligence",
  777. intelligence: history_payload
  778. )
  779. rescue StandardError
  780. nil
  781. end
  782. def persist_validated_story_insights!(payload)
  783. return unless payload.is_a?(Hash)
  784. verified_story_facts = payload[:verified_story_facts].is_a?(Hash) ? payload[:verified_story_facts] : {}
  785. ownership_classification = payload[:ownership_classification].is_a?(Hash) ? payload[:ownership_classification] : {}
  786. generation_policy = payload[:generation_policy].is_a?(Hash) ? payload[:generation_policy] : {}
  787. return if verified_story_facts.blank? && ownership_classification.blank? && generation_policy.blank?
  788. signature_payload = {
  789. verified_story_facts: build_cv_ocr_evidence(local_story_intelligence: verified_story_facts),
  790. ownership_classification: ownership_classification,
  791. generation_policy: generation_policy
  792. }
  793. signature = Digest::SHA256.hexdigest(signature_payload.to_json)
  794. current_meta = metadata.is_a?(Hash) ? metadata.deep_dup : {}
  795. stored = current_meta["validated_story_insights"].is_a?(Hash) ? current_meta["validated_story_insights"] : {}
  796. return if stored["signature"].to_s == signature
  797. current_meta["validated_story_insights"] = {
  798. "signature" => signature,
  799. "validated_at" => Time.current.iso8601,
  800. "verified_story_facts" => verified_story_facts,
  801. "ownership_classification" => ownership_classification,
  802. "generation_policy" => generation_policy
  803. }
  804. current_meta["story_ownership_classification"] = ownership_classification
  805. current_meta["story_generation_policy"] = generation_policy
  806. current_meta["detected_external_usernames"] = Array(ownership_classification[:detected_external_usernames] || ownership_classification["detected_external_usernames"]).map(&:to_s).first(12)
  807. source_profile_references = Array(ownership_classification[:source_profile_references] || ownership_classification["source_profile_references"] || verified_story_facts[:source_profile_references] || verified_story_facts["source_profile_references"]).map(&:to_s).reject(&:blank?).first(20)
  808. source_profile_ids = Array(ownership_classification[:source_profile_ids] || ownership_classification["source_profile_ids"] || verified_story_facts[:source_profile_ids] || verified_story_facts["source_profile_ids"]).map(&:to_s).reject(&:blank?).first(20)
  809. share_status = (ownership_classification[:share_status] || ownership_classification["share_status"]).to_s.presence || "unknown"
  810. allow_comment_value = if generation_policy.key?(:allow_comment)
  811. generation_policy[:allow_comment]
  812. else
  813. generation_policy["allow_comment"]
  814. end
  815. excluded_from_narrative = story_excluded_from_narrative?(ownership: ownership_classification, policy: generation_policy)
  816. current_meta["source_profile_references"] = source_profile_references
  817. current_meta["source_profile_ids"] = source_profile_ids
  818. current_meta["share_status"] = share_status
  819. current_meta["analysis_excluded"] = excluded_from_narrative
  820. current_meta["analysis_exclusion_reason"] = if excluded_from_narrative
  821. ownership_classification[:summary].to_s.presence || ownership_classification["summary"].to_s.presence || generation_policy[:reason].to_s.presence || generation_policy["reason"].to_s.presence
  822. end
  823. current_meta["content_classification"] = {
  824. "share_status" => share_status,
  825. "ownership_label" => ownership_classification[:label] || ownership_classification["label"],
  826. "allow_comment" => ActiveModel::Type::Boolean.new.cast(allow_comment_value),
  827. "source_profile_references" => source_profile_references,
  828. "source_profile_ids" => source_profile_ids
  829. }
  830. update_columns(metadata: current_meta, updated_at: Time.current)
  831. return if excluded_from_narrative
  832. history_payload = verified_story_facts.merge(
  833. ownership_classification: ownership_classification[:label] || ownership_classification["label"],
  834. ownership_summary: ownership_classification[:summary] || ownership_classification["summary"],
  835. ownership_confidence: ownership_classification[:confidence] || ownership_classification["confidence"],
  836. ownership_reason_codes: Array(ownership_classification[:reason_codes] || ownership_classification["reason_codes"]).first(12),
  837. generation_policy: generation_policy,
  838. description: build_story_image_description(local_story_intelligence: verified_story_facts)
  839. )
  840. AppendProfileHistoryNarrativeJob.perform_later(
  841. instagram_profile_event_id: id,
  842. mode: "story_intelligence",
  843. intelligence: history_payload
  844. )
  845. rescue StandardError
  846. nil
  847. end
  848. def build_story_image_description(local_story_intelligence:)
  849. signals = Array(local_story_intelligence[:objects]).first(6)
  850. if signals.empty?
  851. signals = Array(local_story_intelligence[:object_detections])
  852. .map { |row| row.is_a?(Hash) ? (row[:label] || row["label"]) : nil }
  853. .map(&:to_s)
  854. .map(&:strip)
  855. .reject(&:blank?)
  856. .uniq
  857. .first(6)
  858. end
  859. ocr = local_story_intelligence[:ocr_text].to_s.strip
  860. transcript = local_story_intelligence[:transcript].to_s.strip
  861. topic_text = Array(local_story_intelligence[:topics]).first(5).join(", ")
  862. scene_count = Array(local_story_intelligence[:scenes]).length
  863. face_count = local_story_intelligence[:face_count].to_i
  864. parts = []
  865. parts << "Detected visual signals: #{signals.join(', ')}." if signals.any?
  866. parts << "Detected scene transitions: #{scene_count}." if scene_count.positive?
  867. parts << "Detected faces: #{face_count}." if face_count.positive?
  868. parts << "OCR text: #{ocr}." if ocr.present?
  869. parts << "Audio transcript: #{transcript}." if transcript.present?
  870. parts << "Inferred topics: #{topic_text}." if topic_text.present?
  871. parts << "Story media context extracted from local AI pipeline." if parts.empty?
  872. parts.join(" ")
  873. end
  874. def first_present(*values)
  875. values.each do |value|
  876. text = value.to_s.strip
  877. return text if text.present?
  878. end
  879. nil
  880. end
  881. def merge_unique_values(*values)
  882. values.flat_map { |value| Array(value) }
  883. .map(&:to_s)
  884. .map(&:strip)
  885. .reject(&:blank?)
  886. .uniq
  887. .first(40)
  888. end
  889. def apply_historical_validation(validated_story_insights:, historical_comparison:)
  890. insights = validated_story_insights.is_a?(Hash) ? validated_story_insights.deep_dup : {}
  891. ownership = insights[:ownership_classification].is_a?(Hash) ? insights[:ownership_classification] : {}
  892. policy = insights[:generation_policy].is_a?(Hash) ? insights[:generation_policy] : {}
  893. has_overlap = ActiveModel::Type::Boolean.new.cast(historical_comparison[:has_historical_overlap])
  894. external_usernames = Array(ownership[:detected_external_usernames]).map(&:to_s).reject(&:blank?)
  895. if ownership[:label].to_s == "owned_by_profile" && !has_overlap && external_usernames.any?
  896. ownership[:label] = "third_party_content"
  897. ownership[:decision] = "skip_comment"
  898. ownership[:reason_codes] = Array(ownership[:reason_codes]) + [ "no_historical_overlap_with_external_usernames" ]
  899. ownership[:summary] = "Detected external usernames without historical overlap; classified as third-party content."
  900. policy[:allow_comment] = false
  901. policy[:reason_code] = "no_historical_overlap_with_external_usernames"
  902. policy[:reason] = ownership[:summary]
  903. policy[:classification] = ownership[:label]
  904. end
  905. policy[:historical_overlap] = has_overlap
  906. insights[:ownership_classification] = ownership
  907. insights[:generation_policy] = policy
  908. insights
  909. rescue StandardError
  910. validated_story_insights
  911. end
  912. def local_story_intelligence_blank?(payload)
  913. return true unless payload.is_a?(Hash)
  914. payload[:ocr_text].to_s.strip.blank? &&
  915. payload[:transcript].to_s.strip.blank? &&
  916. Array(payload[:objects]).empty? &&
  917. Array(payload[:object_detections]).empty? &&
  918. Array(payload[:ocr_blocks]).empty? &&
  919. Array(payload[:scenes]).empty? &&
  920. Array(payload[:hashtags]).empty? &&
  921. Array(payload[:mentions]).empty? &&
  922. Array(payload[:profile_handles]).empty? &&
  923. Array(payload[:topics]).empty? &&
  924. payload[:face_count].to_i <= 0 &&
  925. Array(payload[:people]).empty?
  926. end
  927. def extract_live_local_intelligence_from_event_media(story_id:)
  928. content_type = media&.blob&.content_type.to_s
  929. return {} if content_type.blank?
  930. if content_type.start_with?("image/")
  931. extract_local_intelligence_from_image_bytes(image_bytes: media.download, story_id: story_id)
  932. elsif content_type.start_with?("video/")
  933. extract_local_intelligence_from_video_bytes(video_bytes: media.download, story_id: story_id, content_type: content_type)
  934. else
  935. {}
  936. end
  937. rescue StandardError
  938. {}
  939. end
  940. def extract_local_intelligence_from_image_bytes(image_bytes:, story_id:)
  941. detection = FaceDetectionService.new.detect(
  942. media_payload: { story_id: story_id.to_s, image_bytes: image_bytes }
  943. )
  944. understanding = StoryContentUnderstandingService.new.build(
  945. media_type: "image",
  946. detections: [detection],
  947. transcript_text: nil
  948. )
  949. people = resolve_people_from_faces(detected_faces: Array(detection[:faces]), fallback_image_bytes: image_bytes, story_id: story_id)
  950. {
  951. ocr_text: understanding[:ocr_text].to_s.presence,
  952. transcript: understanding[:transcript].to_s.presence,
  953. objects: Array(understanding[:objects]).map(&:to_s).reject(&:blank?).uniq.first(30),
  954. hashtags: Array(understanding[:hashtags]).map(&:to_s).reject(&:blank?).uniq.first(20),
  955. mentions: Array(understanding[:mentions]).map(&:to_s).reject(&:blank?).uniq.first(20),
  956. profile_handles: Array(understanding[:profile_handles]).map(&:to_s).reject(&:blank?).uniq.first(30),
  957. topics: Array(understanding[:topics]).map(&:to_s).reject(&:blank?).uniq.first(30),
  958. scenes: Array(understanding[:scenes]).first(80),
  959. ocr_blocks: Array(understanding[:ocr_blocks]).first(120),
  960. object_detections: normalize_object_detections(understanding[:object_detections], limit: 120),
  961. face_count: Array(detection[:faces]).length,
  962. people: people,
  963. reason: detection.dig(:metadata, :reason).to_s.presence,
  964. source: "live_local_vision_ocr"
  965. }
  966. end
  967. def extract_local_intelligence_from_video_bytes(video_bytes:, story_id:, content_type:)
  968. frame_result = VideoFrameExtractionService.new.extract(
  969. video_bytes: video_bytes,
  970. story_id: story_id.to_s,
  971. content_type: content_type.to_s
  972. )
  973. detections = []
  974. faces = []
  975. Array(frame_result[:frames]).first(8).each do |frame|
  976. detection = FaceDetectionService.new.detect(
  977. media_payload: { story_id: story_id.to_s, image_bytes: frame[:image_bytes] }
  978. )
  979. detections << detection
  980. Array(detection[:faces]).each { |face| faces << face.merge(image_bytes: frame[:image_bytes]) }
  981. end
  982. audio_result = VideoAudioExtractionService.new.extract(
  983. video_bytes: video_bytes,
  984. story_id: story_id.to_s,
  985. content_type: content_type.to_s
  986. )
  987. transcript = SpeechTranscriptionService.new.transcribe(
  988. audio_bytes: audio_result[:audio_bytes],
  989. story_id: story_id.to_s
  990. )
  991. video_intel = Ai::LocalMicroserviceClient.new.analyze_video_story_intelligence!(
  992. video_bytes: video_bytes,
  993. sample_rate: 2,
  994. usage_context: { workflow: "story_processing", story_id: story_id.to_s }
  995. ) rescue {}
  996. understanding = StoryContentUnderstandingService.new.build(
  997. media_type: "video",
  998. detections: detections,
  999. transcript_text: transcript[:transcript]
  1000. )
  1001. people = resolve_people_from_faces(
  1002. detected_faces: faces,
  1003. fallback_image_bytes: faces.first&.dig(:image_bytes),
  1004. story_id: story_id
  1005. )
  1006. {
  1007. ocr_text: understanding[:ocr_text].to_s.presence,
  1008. transcript: understanding[:transcript].to_s.presence,
  1009. objects: Array(understanding[:objects]).map(&:to_s).reject(&:blank?).uniq.first(40),
  1010. hashtags: Array(understanding[:hashtags]).map(&:to_s).reject(&:blank?).uniq.first(25),
  1011. mentions: Array(understanding[:mentions]).map(&:to_s).reject(&:blank?).uniq.first(25),
  1012. profile_handles: Array(understanding[:profile_handles]).map(&:to_s).reject(&:blank?).uniq.first(40),
  1013. topics: Array(understanding[:topics]).map(&:to_s).reject(&:blank?).uniq.first(40),
  1014. scenes: normalize_hash_array(understanding[:scenes], video_intel["scenes"]).first(80),
  1015. ocr_blocks: normalize_hash_array(understanding[:ocr_blocks], video_intel["ocr_blocks"]).first(120),
  1016. object_detections: normalize_object_detections(understanding[:object_detections], video_intel["object_detections"], limit: 120),
  1017. face_count: faces.length,
  1018. people: people,
  1019. reason: [ frame_result.dig(:metadata, :reason), audio_result.dig(:metadata, :reason), transcript.dig(:metadata, :reason) ]
  1020. .map(&:to_s)
  1021. .reject(&:blank?)
  1022. .uniq
  1023. .join(", ")
  1024. .presence,
  1025. source: "live_local_video_vision_ocr_transcript"
  1026. }
  1027. end
  1028. def resolve_people_from_faces(detected_faces:, fallback_image_bytes:, story_id:)
  1029. account = instagram_profile&.instagram_account
  1030. profile = instagram_profile
  1031. return [] unless account && profile
  1032. embedding_service = FaceEmbeddingService.new
  1033. matcher = VectorMatchingService.new
  1034. Array(detected_faces).first(5).filter_map do |face|
  1035. candidate_image_bytes = face[:image_bytes].presence || fallback_image_bytes
  1036. next if candidate_image_bytes.blank?
  1037. observation_signature = event_face_observation_signature(story_id: story_id, face: face)
  1038. vector_payload = embedding_service.embed(
  1039. media_payload: { story_id: story_id.to_s, media_type: "image", image_bytes: candidate_image_bytes },
  1040. face: face
  1041. )
  1042. vector = Array(vector_payload[:vector]).map(&:to_f)
  1043. next if vector.empty?
  1044. match = matcher.match_or_create!(
  1045. account: account,
  1046. profile: profile,
  1047. embedding: vector,
  1048. occurred_at: occurred_at || detected_at || Time.current,
  1049. observation_signature: observation_signature
  1050. )
  1051. person = match[:person]
  1052. update_person_face_attributes_for_event!(person: person, face: face)
  1053. {
  1054. person_id: person.id,
  1055. role: match[:role].to_s,
  1056. label: person.label.to_s.presence,
  1057. similarity: match[:similarity],
  1058. age: face[:age],
  1059. age_range: face[:age_range],
  1060. gender: face[:gender],
  1061. gender_score: face[:gender_score].to_f
  1062. }.compact
  1063. end
  1064. rescue StandardError
  1065. []
  1066. end
  1067. def event_face_observation_signature(story_id:, face:)
  1068. bbox = face[:bounding_box].is_a?(Hash) ? face[:bounding_box] : {}
  1069. [
  1070. "event",
  1071. id,
  1072. story_id.to_s,
  1073. face[:frame_index].to_i,
  1074. face[:timestamp_seconds].to_f.round(3),
  1075. bbox["x1"],
  1076. bbox["y1"],
  1077. bbox["x2"],
  1078. bbox["y2"]
  1079. ].map(&:to_s).join(":")
  1080. end
  1081. def update_person_face_attributes_for_event!(person:, face:)
  1082. return unless person
  1083. metadata = person.metadata.is_a?(Hash) ? person.metadata.deep_dup : {}
  1084. attrs = metadata["face_attributes"].is_a?(Hash) ? metadata["face_attributes"].deep_dup : {}
  1085. gender = face[:gender].to_s.strip.downcase
  1086. if gender.present?
  1087. gender_counts = attrs["gender_counts"].is_a?(Hash) ? attrs["gender_counts"].deep_dup : {}
  1088. gender_counts[gender] = gender_counts[gender].to_i + 1
  1089. attrs["gender_counts"] = gender_counts
  1090. attrs["primary_gender_cue"] = gender_counts.max_by { |_key, count| count.to_i }&.first
  1091. end
  1092. age_range = face[:age_range].to_s.strip
  1093. if age_range.present?
  1094. age_counts = attrs["age_range_counts"].is_a?(Hash) ? attrs["age_range_counts"].deep_dup : {}
  1095. age_counts[age_range] = age_counts[age_range].to_i + 1
  1096. attrs["age_range_counts"] = age_counts
  1097. attrs["primary_age_range"] = age_counts.max_by { |_key, count| count.to_i }&.first
  1098. end
  1099. age_value = face[:age].to_f
  1100. if age_value.positive?
  1101. samples = Array(attrs["age_samples"]).map(&:to_f).first(19)
  1102. samples << age_value.round(1)
  1103. attrs["age_samples"] = samples
  1104. attrs["age_estimate"] = (samples.sum / samples.length.to_f).round(1)
  1105. end
  1106. attrs["last_observed_at"] = Time.current.iso8601
  1107. metadata["face_attributes"] = attrs
  1108. person.update_columns(metadata: metadata, updated_at: Time.current)
  1109. rescue StandardError
  1110. nil
  1111. end
  1112. def recent_story_intelligence_context(profile)
  1113. return [] unless profile
  1114. profile.instagram_profile_events
  1115. .where(kind: STORY_ARCHIVE_EVENT_KINDS)
  1116. .order(detected_at: :desc, id: :desc)
  1117. .limit(18)
  1118. .map do |event|
  1119. meta = event.metadata.is_a?(Hash) ? event.metadata : {}
  1120. intel = meta["local_story_intelligence"].is_a?(Hash) ? meta["local_story_intelligence"] : {}
  1121. objects = merge_unique_values(intel["objects"], meta["content_signals"]).first(8)
  1122. hashtags = merge_unique_values(intel["hashtags"], meta["hashtags"]).first(8)
  1123. mentions = merge_unique_values(intel["mentions"], meta["mentions"]).first(6)
  1124. profile_handles = merge_unique_values(intel["profile_handles"], meta["profile_handles"]).first(8)
  1125. topics = merge_unique_values(intel["topics"], meta["topics"]).first(8)
  1126. ocr_text = first_present(intel["ocr_text"], meta["ocr_text"])
  1127. transcript = first_present(intel["transcript"], meta["transcript"])
  1128. scenes = normalize_hash_array(intel["scenes"], meta["scenes"]).first(20)
  1129. people = Array(intel["people"] || meta["face_people"]).first(10)
  1130. face_count = (intel["face_count"] || meta["face_count"]).to_i
  1131. next if objects.empty? && hashtags.empty? && mentions.empty? && profile_handles.empty? && topics.empty? && scenes.empty? && ocr_text.blank? && transcript.blank? && face_count <= 0
  1132. {
  1133. event_id: event.id,
  1134. occurred_at: event.occurred_at&.iso8601 || event.detected_at&.iso8601,
  1135. topics: topics,
  1136. objects: objects,
  1137. scenes: scenes,
  1138. hashtags: hashtags,
  1139. mentions: mentions,
  1140. profile_handles: profile_handles,
  1141. ocr_text: ocr_text.to_s.byteslice(0, 220),
  1142. transcript: transcript.to_s.byteslice(0, 220),
  1143. face_count: face_count,
  1144. scenes_count: scenes.length,
  1145. people: people
  1146. }
  1147. end.compact
  1148. rescue StandardError
  1149. []
  1150. end
  1151. def format_story_intelligence_context(rows)
  1152. entries = Array(rows).first(10)
  1153. return "" if entries.empty?
  1154. lines = entries.map do |row|
  1155. parts = []
  1156. parts << "topics=#{Array(row[:topics]).join(',')}" if Array(row[:topics]).any?
  1157. parts << "objects=#{Array(row[:objects]).join(',')}" if Array(row[:objects]).any?
  1158. parts << "hashtags=#{Array(row[:hashtags]).join(',')}" if Array(row[:hashtags]).any?
  1159. parts << "mentions=#{Array(row[:mentions]).join(',')}" if Array(row[:mentions]).any?
  1160. parts << "handles=#{Array(row[:profile_handles]).join(',')}" if Array(row[:profile_handles]).any?
  1161. parts << "faces=#{row[:face_count].to_i}" if row[:face_count].to_i.positive?
  1162. parts << "scenes=#{row[:scenes_count].to_i}" if row[:scenes_count].to_i.positive?
  1163. parts << "ocr=#{row[:ocr_text]}" if row[:ocr_text].to_s.present?
  1164. parts << "transcript=#{row[:transcript]}" if row[:transcript].to_s.present?
  1165. "- #{parts.join(' | ')}"
  1166. end
  1167. "Recent structured story intelligence:\n#{lines.join("\n")}"
  1168. end
  1169. def build_compact_historical_context(profile:, historical_story_context:, verified_profile_history:, profile_preparation:)
  1170. summary = []
  1171. if profile
  1172. summary << profile.history_narrative_text(max_chunks: 2).to_s
  1173. end
  1174. structured = format_story_intelligence_context(historical_story_context)
  1175. summary << structured.to_s
  1176. summary << format_verified_profile_history(verified_profile_history)
  1177. summary << format_profile_preparation(profile_preparation)
  1178. compact = summary
  1179. .map(&:to_s)
  1180. .map(&:strip)
  1181. .reject(&:blank?)
  1182. .join("\n")
  1183. compact.byteslice(0, 650)
  1184. end
  1185. def latest_profile_comment_preparation(profile)
  1186. meta = profile&.instagram_profile_behavior_profile&.metadata
  1187. payload = meta.is_a?(Hash) ? meta["comment_generation_preparation"] : nil
  1188. payload.is_a?(Hash) ? payload.deep_symbolize_keys : {}
  1189. rescue StandardError
  1190. {}
  1191. end
  1192. def recent_analyzed_profile_history(profile)
  1193. return [] unless profile
  1194. profile.instagram_profile_posts
  1195. .recent_first
  1196. .limit(12)
  1197. .map do |post|
  1198. analysis = post.analysis.is_a?(Hash) ? post.analysis : {}
  1199. faces = post.instagram_post_faces
  1200. next if analysis.blank? && !faces.exists?
  1201. {
  1202. post_id: post.id,
  1203. shortcode: post.shortcode,
  1204. taken_at: post.taken_at&.iso8601,
  1205. caption: post.caption.to_s.byteslice(0, 220),
  1206. image_description: analysis["image_description"].to_s.byteslice(0, 220),
  1207. topics: Array(analysis["topics"]).map(&:to_s).reject(&:blank?).uniq.first(8),
  1208. objects: Array(analysis["objects"]).map(&:to_s).reject(&:blank?).uniq.first(8),
  1209. hashtags: Array(analysis["hashtags"]).map(&:to_s).reject(&:blank?).uniq.first(8),
  1210. mentions: Array(analysis["mentions"]).map(&:to_s).reject(&:blank?).uniq.first(8),
  1211. face_count: faces.count,
  1212. primary_face_count: faces.where(role: "primary_user").count,
  1213. secondary_face_count: faces.where(role: "secondary_person").count
  1214. }
  1215. end.compact
  1216. rescue StandardError
  1217. []
  1218. end
  1219. def build_conversational_voice_profile(profile:, historical_story_context:, verified_profile_history:, profile_preparation:)
  1220. behavior_summary = profile&.instagram_profile_behavior_profile&.behavioral_summary
  1221. behavior_summary = {} unless behavior_summary.is_a?(Hash)
  1222. preparation = profile_preparation.is_a?(Hash) ? profile_preparation : {}
  1223. recent_comments = recent_llm_comments_for_profile(profile).first(6)
  1224. recent_topics = Array(verified_profile_history).flat_map { |row| Array(row[:topics]) }.map(&:to_s).reject(&:blank?).uniq.first(10)
  1225. recurring_story_topics = Array(historical_story_context).flat_map { |row| Array(row[:topics]) }.map(&:to_s).reject(&:blank?).uniq.first(10)
  1226. {
  1227. author_type: determine_author_type(profile),
  1228. profile_tags: profile ? profile.profile_tags.pluck(:name).sort.first(10) : [],
  1229. bio_keywords: extract_topics_from_profile(profile).first(10),
  1230. recurring_topics: (recent_topics + recurring_story_topics + Array(behavior_summary["topic_clusters"]).map(&:first)).map(&:to_s).reject(&:blank?).uniq.first(12),
  1231. recurring_hashtags: Array(behavior_summary["top_hashtags"]).map(&:first).map(&:to_s).reject(&:blank?).first(10),
  1232. frequent_people_labels: Array(behavior_summary["frequent_secondary_persons"]).map { |row| row.is_a?(Hash) ? row["label"] || row[:label] : nil }.map(&:to_s).reject(&:blank?).uniq.first(8),
  1233. prior_comment_examples: recent_comments.map { |value| value.to_s.byteslice(0, 120) },
  1234. identity_consistency: preparation[:identity_consistency].is_a?(Hash) ? preparation[:identity_consistency] : preparation["identity_consistency"],
  1235. profile_preparation_reason: preparation[:reason].to_s.presence || preparation["reason"].to_s.presence
  1236. }.compact
  1237. rescue StandardError
  1238. {}
  1239. end
  1240. def format_verified_profile_history(rows)
  1241. entries = Array(rows).first(8)
  1242. return "" if entries.empty?
  1243. lines = entries.map do |row|
  1244. parts = []
  1245. parts << "shortcode=#{row[:shortcode]}" if row[:shortcode].to_s.present?
  1246. parts << "topics=#{Array(row[:topics]).join(',')}" if Array(row[:topics]).any?
  1247. parts << "objects=#{Array(row[:objects]).join(',')}" if Array(row[:objects]).any?
  1248. parts << "hashtags=#{Array(row[:hashtags]).join(',')}" if Array(row[:hashtags]).any?
  1249. parts << "mentions=#{Array(row[:mentions]).join(',')}" if Array(row[:mentions]).any?
  1250. parts << "faces=#{row[:face_count].to_i}" if row[:face_count].to_i.positive?
  1251. parts << "primary_faces=#{row[:primary_face_count].to_i}" if row[:primary_face_count].to_i.positive?
  1252. parts << "secondary_faces=#{row[:secondary_face_count].to_i}" if row[:secondary_face_count].to_i.positive?
  1253. parts << "desc=#{row[:image_description]}" if row[:image_description].to_s.present?
  1254. "- #{parts.join(' | ')}"
  1255. end
  1256. "Recent analyzed profile posts:\n#{lines.join("\n")}"
  1257. end
  1258. def format_profile_preparation(payload)
  1259. data = payload.is_a?(Hash) ? payload : {}
  1260. return "" if data.blank?
  1261. identity = data[:identity_consistency].is_a?(Hash) ? data[:identity_consistency] : data["identity_consistency"]
  1262. analysis = data[:analysis].is_a?(Hash) ? data[:analysis] : data["analysis"]
  1263. parts = []
  1264. parts << "ready=#{ActiveModel::Type::Boolean.new.cast(data[:ready_for_comment_generation] || data["ready_for_comment_generation"])}"
  1265. parts << "reason=#{data[:reason_code] || data["reason_code"]}"
  1266. parts << "analyzed_posts=#{analysis[:analyzed_posts_count] || analysis["analyzed_posts_count"]}" if analysis.is_a?(Hash)
  1267. parts << "structured_posts=#{analysis[:posts_with_structured_signals_count] || analysis["posts_with_structured_signals_count"]}" if analysis.is_a?(Hash)
  1268. if identity.is_a?(Hash)
  1269. parts << "identity_consistent=#{ActiveModel::Type::Boolean.new.cast(identity[:consistent] || identity["consistent"])}"
  1270. parts << "identity_ratio=#{identity[:dominance_ratio] || identity["dominance_ratio"]}"
  1271. parts << "identity_reason=#{identity[:reason_code] || identity["reason_code"]}"
  1272. end
  1273. return "" if parts.empty?
  1274. "Profile preparation: #{parts.join(' | ')}"
  1275. end
  1276. def story_timeline_data
  1277. raw = metadata.is_a?(Hash) ? metadata : {}
  1278. story = instagram_stories.order(taken_at: :desc, id: :desc).first
  1279. posted_at = raw["upload_time"].presence || raw["taken_at"].presence || story&.taken_at&.iso8601
  1280. downloaded_at = raw["downloaded_at"].presence || occurred_at&.iso8601 || created_at&.iso8601
  1281. {
  1282. story_posted_at: posted_at,
  1283. downloaded_to_system_at: downloaded_at,
  1284. event_detected_at: detected_at&.iso8601
  1285. }
  1286. end
  1287. def estimated_generation_seconds(queue_state:)
  1288. base = 18
  1289. queue_size =
  1290. begin
  1291. require "sidekiq/api"
  1292. Sidekiq::Queue.new("ai").size.to_i
  1293. rescue StandardError
  1294. 0
  1295. end
  1296. queue_factor = queue_state ? queue_size * 4 : [queue_size - 1, 0].max * 3
  1297. attempt_factor = llm_comment_attempts.to_i * 6
  1298. preprocess_factor = local_context_preprocess_penalty
  1299. (base + queue_factor + attempt_factor + preprocess_factor).clamp(10, 240)
  1300. end
  1301. def local_context_preprocess_penalty
  1302. raw = metadata.is_a?(Hash) ? metadata : {}
  1303. has_context = raw["local_story_intelligence"].is_a?(Hash) ||
  1304. raw["ocr_text"].to_s.present? ||
  1305. Array(raw["content_signals"]).any?
  1306. return 0 if has_context
  1307. media_type = media&.blob&.content_type.to_s.presence || raw["media_content_type"].to_s
  1308. media_type.start_with?("image/") ? 16 : 8
  1309. rescue StandardError
  1310. 0
  1311. end
  1312. def recent_llm_comments_for_profile(profile)
  1313. return [] unless profile
  1314. profile.instagram_profile_events
  1315. .where.not(id: id)
  1316. .where.not(llm_generated_comment: [nil, ""])
  1317. .order(llm_comment_generated_at: :desc, id: :desc)
  1318. .limit(12)
  1319. .pluck(:llm_generated_comment)
  1320. .map(&:to_s)
  1321. .reject(&:blank?)
  1322. rescue StandardError
  1323. []
  1324. end
  1325. def build_cv_ocr_evidence(local_story_intelligence:)
  1326. payload = local_story_intelligence.is_a?(Hash) ? local_story_intelligence : {}
  1327. {
  1328. source: payload[:source].to_s,
  1329. reason: payload[:reason].to_s.presence,
  1330. ocr_text: payload[:ocr_text].to_s,
  1331. transcript: payload[:transcript].to_s,
  1332. objects: Array(payload[:objects]).first(20),
  1333. scenes: Array(payload[:scenes]).first(20),
  1334. hashtags: Array(payload[:hashtags]).first(20),
  1335. mentions: Array(payload[:mentions]).first(20),
  1336. profile_handles: Array(payload[:profile_handles]).first(20),
  1337. source_account_reference: payload[:source_account_reference].to_s,
  1338. source_profile_ids: Array(payload[:source_profile_ids]).first(10),
  1339. media_type: payload[:media_type].to_s,
  1340. face_count: payload[:face_count].to_i,
  1341. people: Array(payload[:people]).first(10),
  1342. object_detections: normalize_hash_array(payload[:object_detections]).first(30),
  1343. ocr_blocks: normalize_hash_array(payload[:ocr_blocks]).first(30)
  1344. }
  1345. end
  1346. def build_historical_comparison(current:, historical_story_context:)
  1347. current_hash = current.is_a?(Hash) ? current : {}
  1348. current_topics = Array(current_hash[:topics]).map(&:to_s).reject(&:blank?).uniq
  1349. current_objects = Array(current_hash[:objects]).map(&:to_s).reject(&:blank?).uniq
  1350. current_scenes = Array(current_hash[:scenes]).map { |row| row.is_a?(Hash) ? row[:type] || row["type"] : row }.map(&:to_s).reject(&:blank?).uniq
  1351. current_hashtags = Array(current_hash[:hashtags]).map(&:to_s).reject(&:blank?).uniq
  1352. current_mentions = Array(current_hash[:mentions]).map(&:to_s).reject(&:blank?).uniq
  1353. current_profile_handles = Array(current_hash[:profile_handles]).map(&:to_s).reject(&:blank?).uniq
  1354. current_people = Array(current_hash[:people]).map { |row| row.is_a?(Hash) ? row[:person_id] || row["person_id"] : nil }.compact.map(&:to_s)
  1355. historical_rows = Array(historical_story_context)
  1356. hist_topics = historical_rows.flat_map { |row| Array(row[:topics] || row["topics"]) }.map(&:to_s).reject(&:blank?).uniq
  1357. hist_objects = historical_rows.flat_map { |row| Array(row[:objects] || row["objects"]) }.map(&:to_s).reject(&:blank?).uniq
  1358. hist_scenes = historical_rows.flat_map { |row| Array(row[:scenes] || row["scenes"]) }
  1359. .map { |row| row.is_a?(Hash) ? row[:type] || row["type"] : row }
  1360. .map(&:to_s)
  1361. .reject(&:blank?)
  1362. .uniq
  1363. hist_hashtags = historical_rows.flat_map { |row| Array(row[:hashtags] || row["hashtags"]) }.map(&:to_s).reject(&:blank?).uniq
  1364. hist_mentions = historical_rows.flat_map { |row| Array(row[:mentions] || row["mentions"]) }.map(&:to_s).reject(&:blank?).uniq
  1365. hist_profile_handles = historical_rows.flat_map { |row| Array(row[:profile_handles] || row["profile_handles"]) }.map(&:to_s).reject(&:blank?).uniq
  1366. hist_people = historical_rows.flat_map { |row| Array(row[:people] || row["people"]) }
  1367. .map { |row| row.is_a?(Hash) ? row[:person_id] || row["person_id"] : nil }
  1368. .compact
  1369. .map(&:to_s)
  1370. .uniq
  1371. {
  1372. shared_topics: (current_topics & hist_topics).first(12),
  1373. novel_topics: (current_topics - hist_topics).first(12),
  1374. shared_objects: (current_objects & hist_objects).first(12),
  1375. novel_objects: (current_objects - hist_objects).first(12),
  1376. shared_scenes: (current_scenes & hist_scenes).first(12),
  1377. novel_scenes: (current_scenes - hist_scenes).first(12),
  1378. recurring_hashtags: (current_hashtags & hist_hashtags).first(12),
  1379. recurring_mentions: (current_mentions & hist_mentions).first(12),
  1380. recurring_profile_handles: (current_profile_handles & hist_profile_handles).first(12),
  1381. recurring_people_ids: (current_people & hist_people).first(12),
  1382. has_historical_overlap: ((current_topics & hist_topics).any? || (current_objects & hist_objects).any? || (current_scenes & hist_scenes).any? || (current_hashtags & hist_hashtags).any? || (current_profile_handles & hist_profile_handles).any?)
  1383. }
  1384. end
  1385. def normalize_hash_array(*values)
  1386. values.flat_map { |value| Array(value) }.select { |row| row.is_a?(Hash) }
  1387. end
  1388. def normalize_people_rows(*values)
  1389. rows = values.flat_map { |value| Array(value) }
  1390. rows.filter_map do |row|
  1391. next unless row.is_a?(Hash)
  1392. {
  1393. person_id: row[:person_id] || row["person_id"],
  1394. role: (row[:role] || row["role"]).to_s.presence,
  1395. label: (row[:label] || row["label"]).to_s.presence,
  1396. similarity: (row[:similarity] || row["similarity"] || row[:match_similarity] || row["match_similarity"]).to_f,
  1397. relationship: (row[:relationship] || row["relationship"]).to_s.presence,
  1398. appearances: (row[:appearances] || row["appearances"]).to_i,
  1399. linked_usernames: Array(row[:linked_usernames] || row["linked_usernames"]).map(&:to_s).reject(&:blank?).first(8),
  1400. age: (row[:age] || row["age"]).to_f.positive? ? (row[:age] || row["age"]).to_f.round(1) : nil,
  1401. age_range: (row[:age_range] || row["age_range"]).to_s.presence,
  1402. gender: (row[:gender] || row["gender"]).to_s.presence,
  1403. gender_score: (row[:gender_score] || row["gender_score"]).to_f
  1404. }.compact
  1405. end.uniq { |row| [ row[:person_id], row[:role], row[:similarity].to_f.round(3), row[:label] ] }
  1406. end
  1407. def normalize_object_detections(*values, limit: 120)
  1408. rows = normalize_hash_array(*values).map do |row|
  1409. label = (row[:label] || row["label"] || row[:description] || row["description"]).to_s.downcase.strip
  1410. next if label.blank?
  1411. {
  1412. label: label,
  1413. confidence: (row[:confidence] || row["confidence"] || row[:score] || row["score"] || row[:max_confidence] || row["max_confidence"]).to_f,
  1414. bbox: row[:bbox].is_a?(Hash) ? row[:bbox] : (row["bbox"].is_a?(Hash) ? row["bbox"] : {}),
  1415. timestamps: Array(row[:timestamps] || row["timestamps"]).map(&:to_f).first(80)
  1416. }
  1417. end.compact
  1418. rows
  1419. .uniq { |row| [ row[:label], row[:bbox], row[:timestamps].first(6) ] }
  1420. .sort_by { |row| -row[:confidence].to_f }
  1421. .first(limit.to_i.clamp(1, 300))
  1422. end
  1423. def story_excluded_from_narrative?(ownership:, policy:)
  1424. ownership_hash = ownership.is_a?(Hash) ? ownership : {}
  1425. policy_hash = policy.is_a?(Hash) ? policy : {}
  1426. label = (ownership_hash[:label] || ownership_hash["label"]).to_s
  1427. return true if %w[reshare third_party_content unrelated_post meme_reshare].include?(label)
  1428. allow_comment_value = if policy_hash.key?(:allow_comment)
  1429. policy_hash[:allow_comment]
  1430. else
  1431. policy_hash["allow_comment"]
  1432. end
  1433. allow_comment = ActiveModel::Type::Boolean.new.cast(allow_comment_value)
  1434. reason_code = (policy_hash[:reason_code] || policy_hash["reason_code"]).to_s
  1435. !allow_comment && reason_code.match?(/(reshare|third_party|unrelated|meme)/)
  1436. end
  1437. def extract_source_account_reference(raw:, story_meta:)
  1438. value = raw["story_ref"].to_s.presence || story_meta["story_ref"].to_s.presence
  1439. value = value.delete_suffix(":") if value.to_s.present?
  1440. return value if value.to_s.present?
  1441. url = raw["story_url"].to_s.presence || raw["permalink"].to_s.presence || story_meta["story_url"].to_s.presence
  1442. return nil if url.blank?
  1443. match = url.match(%r{instagram\.com/stories/([a-zA-Z0-9._]+)/?}i) || url.match(%r{instagram\.com/([a-zA-Z0-9._]+)/?}i)
  1444. match ? match[1].to_s.downcase : nil
  1445. end
  1446. def extract_source_profile_ids_from_metadata(raw:, story_meta:)
  1447. rows = []
  1448. %w[source_profile_id owner_id profile_id user_id source_user_id].each do |key|
  1449. value = raw[key] || story_meta[key]
  1450. rows << value.to_s if value.to_s.match?(/\A\d+\z/)
  1451. end
  1452. story_id = raw["story_id"].to_s.presence || story_meta["story_id"].to_s
  1453. story_id.to_s.scan(/(?<!\w)\d{5,}(?!\w)/).each { |token| rows << token }
  1454. rows.uniq.first(10)
  1455. end
  1456. def determine_author_type(profile)
  1457. return "unknown" unless profile
  1458. bio = profile.bio.to_s.downcase
  1459. if bio.include?("creator") || bio.include?("artist")
  1460. "creator"
  1461. elsif bio.include?("business") || bio.include?("entrepreneur")
  1462. "business"
  1463. else
  1464. "personal"
  1465. end
  1466. end
  1467. def extract_topics_from_profile(profile)
  1468. return [] unless profile&.bio
  1469. topics = []
  1470. bio = profile.bio.downcase
  1471. topic_keywords = {
  1472. "fitness" => %w[fitness gym workout health],
  1473. "food" => %w[food cooking chef recipe],
  1474. "travel" => %w[travel wanderlust adventure],
  1475. "fashion" => %w[fashion style outfit beauty],
  1476. "tech" => %w[tech technology coding software],
  1477. "art" => %w[art artist creative design],
  1478. "business" => %w[business entrepreneur startup],
  1479. "photography" => %w[photography photo camera]
  1480. }
  1481. topic_keywords.each do |topic, keywords|
  1482. topics << topic if keywords.any? { |keyword| bio.include?(keyword) }
  1483. end
  1484. topics.uniq
  1485. end
  1486. end

app/models/instagram_profile_history_chunk.rb

0.0% lines covered

100.0% branches covered

9 relevant lines. 0 lines covered and 9 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramProfileHistoryChunk < ApplicationRecord
  2. belongs_to :instagram_account
  3. belongs_to :instagram_profile
  4. validates :sequence, presence: true
  5. validates :word_count, numericality: { greater_than_or_equal_to: 0 }
  6. validates :entry_count, numericality: { greater_than_or_equal_to: 0 }
  7. scope :ordered, -> { order(:sequence, :id) }
  8. scope :recent_first, -> { order(sequence: :desc, id: :desc) }
  9. end

app/models/instagram_profile_insight.rb

0.0% lines covered

100.0% branches covered

9 relevant lines. 0 lines covered and 9 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramProfileInsight < ApplicationRecord
  2. belongs_to :instagram_account
  3. belongs_to :instagram_profile
  4. belongs_to :ai_analysis
  5. has_one :instagram_profile_message_strategy, dependent: :destroy
  6. has_many :instagram_profile_signal_evidences, dependent: :destroy
  7. validates :last_refreshed_at, presence: true
  8. scope :recent_first, -> { order(created_at: :desc, id: :desc) }
  9. end

app/models/instagram_profile_message_strategy.rb

0.0% lines covered

100.0% branches covered

7 relevant lines. 0 lines covered and 7 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramProfileMessageStrategy < ApplicationRecord
  2. belongs_to :instagram_account
  3. belongs_to :instagram_profile
  4. belongs_to :ai_analysis
  5. belongs_to :instagram_profile_insight
  6. scope :recent_first, -> { order(created_at: :desc, id: :desc) }
  7. end

app/models/instagram_profile_post.rb

0.0% lines covered

100.0% branches covered

18 relevant lines. 0 lines covered and 18 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramProfilePost < ApplicationRecord
  2. belongs_to :instagram_account
  3. belongs_to :instagram_profile
  4. has_many :instagram_profile_post_comments, dependent: :destroy
  5. has_many :instagram_post_faces, dependent: :destroy
  6. has_many :ai_analyses, as: :analyzable, dependent: :destroy
  7. has_one_attached :media
  8. has_one_attached :preview_image
  9. validates :shortcode, presence: true
  10. scope :recent_first, -> { order(taken_at: :desc, id: :desc) }
  11. scope :pending_ai, -> { where(ai_status: "pending") }
  12. def permalink_url
  13. permalink.presence || "#{Instagram::Client::INSTAGRAM_BASE_URL}/p/#{shortcode}/"
  14. end
  15. def latest_analysis
  16. ai_analyses.where(purpose: "post").recent_first.first
  17. end
  18. end

app/models/instagram_profile_post_comment.rb

0.0% lines covered

100.0% branches covered

6 relevant lines. 0 lines covered and 6 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramProfilePostComment < ApplicationRecord
  2. belongs_to :instagram_profile_post
  3. belongs_to :instagram_profile
  4. validates :body, presence: true
  5. scope :recent_first, -> { order(commented_at: :desc, id: :desc) }
  6. end

app/models/instagram_profile_signal_evidence.rb

0.0% lines covered

100.0% branches covered

8 relevant lines. 0 lines covered and 8 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramProfileSignalEvidence < ApplicationRecord
  2. belongs_to :instagram_account
  3. belongs_to :instagram_profile
  4. belongs_to :ai_analysis
  5. belongs_to :instagram_profile_insight
  6. validates :signal_type, presence: true
  7. scope :recent_first, -> { order(created_at: :desc, id: :desc) }
  8. end

app/models/instagram_profile_tagging.rb

0.0% lines covered

100.0% branches covered

5 relevant lines. 0 lines covered and 5 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramProfileTagging < ApplicationRecord
  2. belongs_to :instagram_profile
  3. belongs_to :profile_tag
  4. validates :instagram_profile_id, uniqueness: { scope: :profile_tag_id }
  5. end

app/models/instagram_story.rb

0.0% lines covered

100.0% branches covered

17 relevant lines. 0 lines covered and 17 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramStory < ApplicationRecord
  2. belongs_to :instagram_account
  3. belongs_to :instagram_profile
  4. belongs_to :source_event, class_name: "InstagramProfileEvent", optional: true
  5. has_many :instagram_story_faces, dependent: :destroy
  6. has_one_attached :media
  7. validates :story_id, presence: true
  8. validates :processing_status, presence: true
  9. scope :processed, -> { where(processed: true) }
  10. scope :recent_first, -> { order(taken_at: :desc, id: :desc) }
  11. def video?
  12. media_type.to_s == "video" || media&.content_type.to_s.start_with?("video/")
  13. end
  14. def image?
  15. !video?
  16. end
  17. end

app/models/instagram_story_face.rb

0.0% lines covered

100.0% branches covered

6 relevant lines. 0 lines covered and 6 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramStoryFace < ApplicationRecord
  2. ROLES = %w[primary_user secondary_person unknown].freeze
  3. belongs_to :instagram_story
  4. belongs_to :instagram_story_person, optional: true
  5. validates :role, presence: true, inclusion: { in: ROLES }
  6. end

app/models/instagram_story_person.rb

0.0% lines covered

100.0% branches covered

63 relevant lines. 0 lines covered and 63 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class InstagramStoryPerson < ApplicationRecord
  2. ROLES = %w[primary_user secondary_person unknown].freeze
  3. INACTIVE_MATCHING_STATUSES = %w[incorrect irrelevant synthetic].freeze
  4. belongs_to :instagram_account
  5. belongs_to :instagram_profile
  6. has_many :instagram_story_faces, dependent: :nullify
  7. has_many :instagram_post_faces, dependent: :nullify
  8. validates :role, presence: true, inclusion: { in: ROLES }
  9. scope :recently_seen, -> { order(last_seen_at: :desc, id: :desc) }
  10. def display_label
  11. label.to_s.presence || "person_#{id}"
  12. end
  13. def metadata_hash
  14. metadata.is_a?(Hash) ? metadata : {}
  15. end
  16. def feedback_metadata
  17. value = metadata_hash["user_feedback"]
  18. value.is_a?(Hash) ? value : {}
  19. end
  20. def real_person_status
  21. feedback_metadata["real_person_status"].to_s.presence || "unverified"
  22. end
  23. def merged_into_person_id
  24. value = metadata_hash["merged_into_person_id"]
  25. value.present? ? value.to_i : nil
  26. end
  27. def merged?
  28. merged_into_person_id.present?
  29. end
  30. def active_for_matching?
  31. return false if merged?
  32. !INACTIVE_MATCHING_STATUSES.include?(real_person_status)
  33. end
  34. def identity_confidence
  35. raw = metadata_hash["identity_confidence"]
  36. return 0.0 if raw.nil?
  37. raw.to_f.clamp(0.0, 1.0)
  38. end
  39. def sync_identity_confidence!(timestamp: Time.current)
  40. meta = metadata_hash.deep_dup
  41. meta["identity_confidence"] = self.class.identity_confidence_score(
  42. appearance_count: appearance_count.to_i,
  43. role: role.to_s,
  44. metadata: meta
  45. )
  46. update_columns(metadata: meta, updated_at: timestamp)
  47. meta["identity_confidence"].to_f
  48. end
  49. def self.identity_confidence_score(appearance_count:, role:, metadata:)
  50. count = appearance_count.to_i
  51. score = [ count / 10.0, 1.0 ].min
  52. score += 0.18 if role.to_s == "primary_user"
  53. meta = metadata.is_a?(Hash) ? metadata : {}
  54. feedback = meta["user_feedback"].is_a?(Hash) ? meta["user_feedback"] : {}
  55. status = feedback["real_person_status"].to_s
  56. score += 0.22 if status == "confirmed_real_person"
  57. score += 0.10 if status == "likely_real_person"
  58. score -= 0.45 if INACTIVE_MATCHING_STATUSES.include?(status)
  59. linked_usernames_count = Array(meta["linked_usernames"]).reject(&:blank?).size
  60. score += [ linked_usernames_count * 0.03, 0.15 ].min
  61. score.clamp(0.0, 1.0).round(3)
  62. end
  63. end

app/models/profile_tag.rb

0.0% lines covered

100.0% branches covered

8 relevant lines. 0 lines covered and 8 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class ProfileTag < ApplicationRecord
  2. has_many :instagram_profile_taggings, dependent: :destroy
  3. has_many :instagram_profiles, through: :instagram_profile_taggings
  4. validates :name, presence: true, uniqueness: { case_sensitive: false }
  5. before_validation do
  6. self.name = name.to_s.strip.downcase
  7. end
  8. end

app/models/sync_run.rb

0.0% lines covered

100.0% branches covered

14 relevant lines. 0 lines covered and 14 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class SyncRun < ApplicationRecord
  2. belongs_to :instagram_account
  3. validates :kind, presence: true
  4. validates :status, presence: true
  5. def stats
  6. return {} if stats_json.blank?
  7. JSON.parse(stats_json)
  8. rescue JSON::ParserError
  9. {}
  10. end
  11. def stats=(value)
  12. self.stats_json = value.to_h.to_json
  13. end
  14. end

app/services/ai/api_usage_tracker.rb

0.0% lines covered

100.0% branches covered

83 relevant lines. 0 lines covered and 83 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Ai
  2. class ApiUsageTracker
  3. THREAD_CONTEXT_KEY = :ai_api_usage_context
  4. class << self
  5. def with_context(context = {})
  6. previous = current_context
  7. Thread.current[THREAD_CONTEXT_KEY] = previous.merge(context.to_h.compact)
  8. yield
  9. ensure
  10. Thread.current[THREAD_CONTEXT_KEY] = previous
  11. end
  12. def current_context
  13. Thread.current[THREAD_CONTEXT_KEY].is_a?(Hash) ? Thread.current[THREAD_CONTEXT_KEY] : {}
  14. end
  15. def track_success(provider:, operation:, category:, started_at:, instagram_account_id: nil, http_status: nil, request_units: nil, input_tokens: nil, output_tokens: nil, total_tokens: nil, metadata: {})
  16. create_record(
  17. provider: provider,
  18. operation: operation,
  19. category: category,
  20. status: "succeeded",
  21. started_at: started_at,
  22. instagram_account_id: instagram_account_id,
  23. http_status: http_status,
  24. request_units: request_units,
  25. input_tokens: input_tokens,
  26. output_tokens: output_tokens,
  27. total_tokens: total_tokens,
  28. metadata: metadata
  29. )
  30. end
  31. def track_failure(provider:, operation:, category:, started_at:, error:, instagram_account_id: nil, http_status: nil, request_units: nil, metadata: {})
  32. create_record(
  33. provider: provider,
  34. operation: operation,
  35. category: category,
  36. status: "failed",
  37. started_at: started_at,
  38. instagram_account_id: instagram_account_id,
  39. http_status: http_status,
  40. request_units: request_units,
  41. metadata: metadata,
  42. error_message: error.to_s
  43. )
  44. end
  45. private
  46. def create_record(provider:, operation:, category:, status:, started_at:, instagram_account_id:, http_status:, request_units:, input_tokens: nil, output_tokens: nil, total_tokens: nil, metadata: {}, error_message: nil)
  47. occurred_at = Time.current
  48. latency_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at.to_f) * 1000.0).round
  49. context = current_context
  50. account_id = integer_or_nil(instagram_account_id) || integer_or_nil(context[:instagram_account_id])
  51. AiApiCall.create!(
  52. instagram_account_id: account_id,
  53. provider: provider.to_s,
  54. operation: operation.to_s,
  55. category: normalize_category(category),
  56. status: status.to_s,
  57. http_status: integer_or_nil(http_status),
  58. latency_ms: latency_ms,
  59. request_units: integer_or_nil(request_units),
  60. input_tokens: integer_or_nil(input_tokens),
  61. output_tokens: integer_or_nil(output_tokens),
  62. total_tokens: integer_or_nil(total_tokens),
  63. error_message: error_message,
  64. occurred_at: occurred_at,
  65. metadata: (metadata.to_h.compact.presence || {}).merge(context.except(:instagram_account_id))
  66. )
  67. rescue StandardError => e
  68. Rails.logger.warn("[Ai::ApiUsageTracker] failed to persist usage event: #{e.class}: #{e.message}")
  69. end
  70. def normalize_category(value)
  71. raw = value.to_s.strip
  72. return raw if AiApiCall::CATEGORIES.include?(raw)
  73. "other"
  74. end
  75. def integer_or_nil(value)
  76. return nil if value.blank?
  77. Integer(value)
  78. rescue StandardError
  79. nil
  80. end
  81. end
  82. end
  83. end

app/services/ai/comment_relevance_scorer.rb

0.0% lines covered

100.0% branches covered

55 relevant lines. 0 lines covered and 55 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Ai
  2. class CommentRelevanceScorer
  3. class << self
  4. def rank(suggestions:, image_description:, topics:, historical_comments: [])
  5. rows = Array(suggestions).map do |suggestion|
  6. text = suggestion.to_s.strip
  7. next if text.blank?
  8. [
  9. text,
  10. score(
  11. comment: text,
  12. image_description: image_description,
  13. topics: topics,
  14. historical_comments: historical_comments
  15. )
  16. ]
  17. end.compact
  18. rows.sort_by { |(_text, value)| -value }
  19. end
  20. def score(comment:, image_description:, topics:, historical_comments: [])
  21. tokens = normalize_tokens(comment)
  22. return 0.0 if tokens.empty?
  23. topic_tokens = normalize_tokens(Array(topics).join(" "))
  24. image_tokens = normalize_tokens(image_description)
  25. history_tokens = Array(historical_comments).flat_map { |value| normalize_tokens(value) }
  26. topic_overlap = overlap_ratio(tokens, topic_tokens)
  27. image_overlap = overlap_ratio(tokens, image_tokens)
  28. novelty = 1.0 - overlap_ratio(tokens, history_tokens)
  29. length_bonus = if comment.length.between?(20, 110)
  30. 0.12
  31. elsif comment.length > 140
  32. -0.2
  33. else
  34. 0.0
  35. end
  36. raw = (topic_overlap * 0.4) + (image_overlap * 0.25) + (novelty * 0.35) + length_bonus
  37. raw.clamp(0.0, 1.0).round(4)
  38. end
  39. private
  40. def overlap_ratio(tokens, other_tokens)
  41. return 0.0 if tokens.empty? || other_tokens.empty?
  42. shared = (tokens & other_tokens).size
  43. (shared.to_f / tokens.size.to_f).clamp(0.0, 1.0)
  44. end
  45. def normalize_tokens(value)
  46. value.to_s
  47. .downcase
  48. .gsub(/[^a-z0-9\s]/, " ")
  49. .split
  50. .reject { |token| token.length < 3 }
  51. .uniq
  52. end
  53. end
  54. end
  55. end

app/services/ai/insight_sync.rb

0.0% lines covered

100.0% branches covered

196 relevant lines. 0 lines covered and 196 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Ai
  2. class InsightSync
  3. class << self
  4. def sync_profile!(analysis_record:, payload:, analysis_hash:)
  5. profile = analysis_record.analyzable
  6. return unless profile.is_a?(InstagramProfile)
  7. languages = Array(analysis_hash["languages"]).filter_map do |l|
  8. next unless l.is_a?(Hash)
  9. l["language"].to_s.strip.presence
  10. end
  11. primary_language = languages.first
  12. secondary_languages = languages.drop(1)
  13. writing_style = analysis_hash["writing_style"].is_a?(Hash) ? analysis_hash["writing_style"] : {}
  14. likes = normalize_string_array(analysis_hash["likes"])
  15. dislikes = normalize_string_array(analysis_hash["dislikes"])
  16. insight = InstagramProfileInsight.create!(
  17. instagram_account: analysis_record.instagram_account,
  18. instagram_profile: profile,
  19. ai_analysis: analysis_record,
  20. summary: analysis_hash["summary"].to_s,
  21. primary_language: primary_language,
  22. secondary_languages: secondary_languages,
  23. tone: writing_style["tone"].to_s.presence,
  24. formality: writing_style["formality"].to_s.presence,
  25. emoji_usage: writing_style["emoji_usage"].to_s.presence,
  26. slang_level: writing_style["slang_level"].to_s.presence,
  27. engagement_style: infer_engagement_style(writing_style: writing_style),
  28. profile_type: infer_profile_type(profile: profile, payload: payload),
  29. messageability_score: infer_messageability_score(payload),
  30. last_refreshed_at: Time.current,
  31. raw_analysis: analysis_hash
  32. )
  33. InstagramProfileMessageStrategy.create!(
  34. instagram_account: analysis_record.instagram_account,
  35. instagram_profile: profile,
  36. ai_analysis: analysis_record,
  37. instagram_profile_insight: insight,
  38. opener_templates: normalize_string_array(analysis_hash["suggested_dm_openers"]),
  39. comment_templates: normalize_string_array(analysis_hash["suggested_comment_templates"]),
  40. dos: (likes + normalize_string_array(analysis_hash["personalization_tokens"])).uniq.first(10),
  41. donts: (dislikes + normalize_string_array(analysis_hash["no_go_zones"])).uniq.first(10),
  42. cta_style: infer_cta_style(analysis_hash),
  43. best_topics: likes.first(15),
  44. avoid_topics: dislikes.first(15)
  45. )
  46. create_profile_evidences!(
  47. insight: insight,
  48. analysis_record: analysis_record,
  49. analysis_hash: analysis_hash,
  50. likes: likes,
  51. dislikes: dislikes
  52. )
  53. end
  54. def sync_post!(analysis_record:, analysis_hash:)
  55. post = analysis_record.analyzable
  56. return unless post.is_a?(InstagramPost)
  57. topics = normalize_string_array(analysis_hash["topics"])
  58. actions = normalize_string_array(analysis_hash["suggested_actions"])
  59. comments = normalize_string_array(analysis_hash["comment_suggestions"])
  60. post_insight = InstagramPostInsight.create!(
  61. instagram_account: analysis_record.instagram_account,
  62. instagram_post: post,
  63. ai_analysis: analysis_record,
  64. image_description: analysis_hash["image_description"].to_s.presence,
  65. relevant: to_bool(analysis_hash["relevant"]),
  66. author_type: analysis_hash["author_type"].to_s.presence,
  67. sentiment: analysis_hash["sentiment"].to_s.presence,
  68. topics: topics,
  69. suggested_actions: actions,
  70. comment_suggestions: comments,
  71. confidence: to_float(analysis_hash["confidence"]),
  72. evidence: analysis_hash["evidence"].to_s,
  73. engagement_score: to_float(analysis_hash["engagement_score"]) || to_float(analysis_hash["confidence"]),
  74. recommended_next_action: analysis_hash["recommended_next_action"].to_s.presence || actions.first,
  75. raw_analysis: analysis_hash
  76. )
  77. (topics + normalize_string_array(analysis_hash["personalization_tokens"])).uniq.each do |topic|
  78. InstagramPostEntity.create!(
  79. instagram_account: analysis_record.instagram_account,
  80. instagram_post: post,
  81. instagram_post_insight: post_insight,
  82. entity_type: topics.include?(topic) ? "topic" : "personalization_token",
  83. value: topic,
  84. confidence: to_float(analysis_hash["confidence"]),
  85. evidence_text: analysis_hash["evidence"].to_s,
  86. source_type: "ai_analysis",
  87. source_ref: analysis_record.id.to_s
  88. )
  89. end
  90. end
  91. private
  92. def create_profile_evidences!(insight:, analysis_record:, analysis_hash:, likes:, dislikes:)
  93. Array(analysis_hash["languages"]).each do |lang|
  94. next unless lang.is_a?(Hash)
  95. value = lang["language"].to_s.strip
  96. next if value.blank?
  97. InstagramProfileSignalEvidence.create!(
  98. instagram_account: analysis_record.instagram_account,
  99. instagram_profile: insight.instagram_profile,
  100. ai_analysis: analysis_record,
  101. instagram_profile_insight: insight,
  102. signal_type: "language",
  103. value: value,
  104. confidence: to_float(lang["confidence"]),
  105. evidence_text: lang["evidence"].to_s,
  106. source_type: "ai_analysis",
  107. source_ref: analysis_record.id.to_s,
  108. occurred_at: Time.current
  109. )
  110. end
  111. likes.each do |topic|
  112. InstagramProfileSignalEvidence.create!(
  113. instagram_account: analysis_record.instagram_account,
  114. instagram_profile: insight.instagram_profile,
  115. ai_analysis: analysis_record,
  116. instagram_profile_insight: insight,
  117. signal_type: "interest",
  118. value: topic,
  119. confidence: nil,
  120. evidence_text: "likes",
  121. source_type: "ai_analysis",
  122. source_ref: analysis_record.id.to_s,
  123. occurred_at: Time.current
  124. )
  125. end
  126. dislikes.each do |topic|
  127. InstagramProfileSignalEvidence.create!(
  128. instagram_account: analysis_record.instagram_account,
  129. instagram_profile: insight.instagram_profile,
  130. ai_analysis: analysis_record,
  131. instagram_profile_insight: insight,
  132. signal_type: "avoidance",
  133. value: topic,
  134. confidence: nil,
  135. evidence_text: "dislikes",
  136. source_type: "ai_analysis",
  137. source_ref: analysis_record.id.to_s,
  138. occurred_at: Time.current
  139. )
  140. end
  141. notes = analysis_hash["confidence_notes"].to_s.strip
  142. if notes.present?
  143. InstagramProfileSignalEvidence.create!(
  144. instagram_account: analysis_record.instagram_account,
  145. instagram_profile: insight.instagram_profile,
  146. ai_analysis: analysis_record,
  147. instagram_profile_insight: insight,
  148. signal_type: "confidence_note",
  149. value: nil,
  150. confidence: nil,
  151. evidence_text: notes,
  152. source_type: "ai_analysis",
  153. source_ref: analysis_record.id.to_s,
  154. occurred_at: Time.current
  155. )
  156. end
  157. end
  158. def infer_profile_type(profile:, payload:)
  159. tags = profile.profile_tags.pluck(:name)
  160. return "page" if tags.include?("page")
  161. return "personal" if tags.include?("personal_user") || tags.include?("friend")
  162. bio = payload[:bio].to_s.downcase
  163. return "business" if bio.match?(/\b(bookings|business|official|shop|store)\b/)
  164. "unknown"
  165. end
  166. def infer_messageability_score(payload)
  167. can_message = payload[:can_message]
  168. return 0.8 if can_message == true
  169. return 0.2 if can_message == false
  170. 0.5
  171. end
  172. def infer_engagement_style(writing_style:)
  173. tone = writing_style["tone"].to_s
  174. formality = writing_style["formality"].to_s
  175. emoji = writing_style["emoji_usage"].to_s
  176. [tone, formality, emoji].reject(&:blank?).join("/").presence || "unknown"
  177. end
  178. def infer_cta_style(analysis_hash)
  179. first = normalize_string_array(analysis_hash["suggested_dm_openers"]).first.to_s
  180. return "question_based" if first.include?("?")
  181. "soft"
  182. end
  183. def normalize_string_array(value)
  184. Array(value).filter_map { |v| v.to_s.strip.presence }
  185. end
  186. def to_float(value)
  187. Float(value)
  188. rescue StandardError
  189. nil
  190. end
  191. def to_bool(value)
  192. ActiveModel::Type::Boolean.new.cast(value)
  193. end
  194. end
  195. end
  196. end

app/services/ai/local_engagement_comment_generator.rb

0.0% lines covered

100.0% branches covered

462 relevant lines. 0 lines covered and 462 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "json"
  2. require "net/http"
  3. module Ai
  4. class LocalEngagementCommentGenerator
  5. DEFAULT_MODEL = "mistral:7b".freeze
  6. MIN_SUGGESTIONS = 3
  7. MAX_SUGGESTIONS = 8
  8. BLOCKED_TERMS = %w[].freeze
  9. TRANSIENT_ERRORS = [
  10. Net::OpenTimeout,
  11. Net::ReadTimeout,
  12. Errno::ECONNRESET,
  13. Errno::ECONNREFUSED
  14. ].freeze
  15. def initialize(ollama_client:, model: nil)
  16. @ollama_client = ollama_client
  17. @model = model.to_s.presence || DEFAULT_MODEL
  18. end
  19. def generate!(post_payload:, image_description:, topics:, author_type:, historical_comments: [], historical_context: nil, historical_story_context: [], local_story_intelligence: {}, historical_comparison: {}, cv_ocr_evidence: {}, verified_story_facts: {}, story_ownership_classification: {}, generation_policy: {}, profile_preparation: {}, verified_profile_history: [], conversational_voice: {}, **_extra)
  20. if generation_policy.is_a?(Hash) && generation_policy.key?(:allow_comment) && !ActiveModel::Type::Boolean.new.cast(generation_policy[:allow_comment] || generation_policy["allow_comment"])
  21. return {
  22. model: @model,
  23. prompt: nil,
  24. raw: {},
  25. source: "policy",
  26. status: "blocked_by_policy",
  27. fallback_used: false,
  28. error_message: generation_policy[:reason].to_s.presence || generation_policy["reason"].to_s.presence || "Generation blocked by verified story policy.",
  29. comment_suggestions: []
  30. }
  31. end
  32. prompt = build_prompt(
  33. post_payload: post_payload,
  34. image_description: image_description,
  35. topics: topics,
  36. author_type: author_type,
  37. historical_comments: historical_comments,
  38. historical_context: historical_context,
  39. historical_story_context: historical_story_context,
  40. local_story_intelligence: local_story_intelligence,
  41. historical_comparison: historical_comparison,
  42. cv_ocr_evidence: cv_ocr_evidence,
  43. verified_story_facts: verified_story_facts,
  44. story_ownership_classification: story_ownership_classification,
  45. generation_policy: generation_policy,
  46. profile_preparation: profile_preparation,
  47. verified_profile_history: verified_profile_history,
  48. conversational_voice: conversational_voice
  49. )
  50. resp = @ollama_client.generate(
  51. model: @model,
  52. prompt: prompt,
  53. temperature: 0.7,
  54. max_tokens: 300
  55. )
  56. suggestions = parse_comment_suggestions(resp)
  57. suggestions = filter_safe_comments(suggestions)
  58. if suggestions.size < MIN_SUGGESTIONS
  59. retry_resp = @ollama_client.generate(
  60. model: @model,
  61. prompt: "#{prompt}\n\nReturn strict JSON only. Ensure 8 non-empty suggestions.",
  62. temperature: 0.4,
  63. max_tokens: 220
  64. )
  65. retry_suggestions = filter_safe_comments(parse_comment_suggestions(retry_resp))
  66. suggestions = retry_suggestions if retry_suggestions.size >= MIN_SUGGESTIONS
  67. end
  68. if suggestions.size < MIN_SUGGESTIONS
  69. fallback = fallback_comments(image_description: image_description, topics: topics).first(MAX_SUGGESTIONS)
  70. return {
  71. model: @model,
  72. prompt: prompt,
  73. raw: resp,
  74. source: "fallback",
  75. status: "fallback_used",
  76. fallback_used: true,
  77. error_message: "Generated suggestions were insufficient (#{suggestions.size}/#{MIN_SUGGESTIONS})",
  78. comment_suggestions: fallback
  79. }
  80. end
  81. {
  82. model: @model,
  83. prompt: prompt,
  84. raw: resp,
  85. source: "ollama",
  86. status: "ok",
  87. fallback_used: false,
  88. error_message: nil,
  89. comment_suggestions: suggestions.first(MAX_SUGGESTIONS)
  90. }
  91. rescue *TRANSIENT_ERRORS
  92. raise
  93. rescue StandardError => e
  94. {
  95. model: @model,
  96. prompt: prompt,
  97. raw: {},
  98. source: "fallback",
  99. status: "error_fallback",
  100. fallback_used: true,
  101. error_message: e.message.to_s,
  102. comment_suggestions: fallback_comments(image_description: image_description, topics: topics).first(MAX_SUGGESTIONS)
  103. }
  104. end
  105. private
  106. def build_prompt(post_payload:, image_description:, topics:, author_type:, historical_comments:, historical_context:, historical_story_context:, local_story_intelligence:, historical_comparison:, cv_ocr_evidence:, verified_story_facts:, story_ownership_classification:, generation_policy:, profile_preparation: {}, verified_profile_history: [], conversational_voice: {})
  107. verified_story_facts = compact_verified_story_facts(
  108. verified_story_facts,
  109. local_story_intelligence: local_story_intelligence,
  110. cv_ocr_evidence: cv_ocr_evidence
  111. )
  112. story_ownership_classification = compact_story_ownership_classification(story_ownership_classification)
  113. generation_policy = compact_generation_policy(generation_policy)
  114. historical_comparison = compact_historical_comparison(historical_comparison)
  115. compact_story_history = compact_historical_story_context(historical_story_context)
  116. profile_summary = compact_author_profile(post_payload[:author_profile], author_type: author_type)
  117. profile_preparation = compact_profile_preparation(profile_preparation)
  118. verified_profile_history = compact_verified_profile_history(verified_profile_history)
  119. conversational_voice = compact_conversational_voice(conversational_voice)
  120. context_json = {
  121. task: "instagram_story_comment_generation",
  122. output_contract: {
  123. format: "strict_json",
  124. count: 8,
  125. max_chars_per_comment: 140
  126. },
  127. profile: profile_summary,
  128. profile_preparation: profile_preparation,
  129. conversational_voice: conversational_voice,
  130. current_story: {
  131. image_description: truncate_text(image_description.to_s, max: 280),
  132. topics: Array(topics).map(&:to_s).reject(&:blank?).uniq.first(10),
  133. verified_story_facts: verified_story_facts,
  134. ownership: story_ownership_classification,
  135. generation_policy: generation_policy
  136. },
  137. historical_context: {
  138. comparison: historical_comparison,
  139. recent_story_patterns: compact_story_history,
  140. recent_profile_history: verified_profile_history,
  141. recent_comments: Array(historical_comments).map { |value| truncate_text(value.to_s, max: 110) }.reject(&:blank?).first(6),
  142. summary: truncate_text(historical_context.to_s, max: 280)
  143. }
  144. }
  145. <<~PROMPT
  146. You are a production-grade Instagram engagement assistant.
  147. Generate concise comments from VERIFIED data only.
  148. Grounding rules:
  149. - treat CONTEXT_JSON as the only source of truth
  150. - never use URLs, IDs, or hidden metadata as evidence
  151. - do not infer facts not present in `verified_story_facts`
  152. - require `profile_preparation.ready_for_comment_generation` to be true for personalized comments
  153. - if `generation_policy.allow_comment` is false, return empty suggestions
  154. - if ownership is not `owned_by_profile`, keep output neutral and non-personal
  155. - if identity_verification.owner_likelihood is low, avoid user-specific assumptions
  156. - never fabricate OCR text, usernames, objects, scenes, or participants
  157. Writing rules:
  158. - natural, public-safe, short comments
  159. - max 140 chars each
  160. - vary openings and avoid duplicates
  161. - avoid explicit/adult language
  162. - avoid identity, age, gender, or sensitive-trait claims
  163. - reflect recurring themes and wording style from `historical_context` and `conversational_voice`
  164. Output STRICT JSON only:
  165. {
  166. "comment_suggestions": ["...", "...", "...", "...", "...", "...", "...", "..."]
  167. }
  168. Generate exactly 8 suggestions, each <= 140 characters.
  169. Keep at least 3 suggestions neutral-safe for public comments.
  170. Avoid repeating phrases from previous comments for the same profile.
  171. CONTEXT_JSON:
  172. #{JSON.pretty_generate(context_json)}
  173. PROMPT
  174. end
  175. def filter_safe_comments(comments)
  176. filtered = Array(comments)
  177. return filtered if BLOCKED_TERMS.empty?
  178. filtered.reject do |comment|
  179. lc = comment.to_s.downcase
  180. BLOCKED_TERMS.any? { |term| lc.include?(term) }
  181. end
  182. end
  183. def normalize_comment(value)
  184. text = value.to_s.gsub(/\s+/, " ").strip
  185. return nil if text.blank?
  186. text.byteslice(0, 140)
  187. end
  188. def parse_comment_suggestions(response_payload)
  189. parsed = JSON.parse(response_payload["response"]) rescue nil
  190. Array(parsed&.dig("comment_suggestions")).map { |v| normalize_comment(v) }.compact.uniq
  191. end
  192. def fallback_comments(image_description:, topics:)
  193. anchor = Array(topics).map(&:to_s).find(&:present?) || image_description.to_s.split(/[,.]/).first.to_s.downcase
  194. anchor = "this post" if anchor.blank?
  195. [
  196. "Okay this is a whole vibe 🔥",
  197. "Not gonna lie, this #{anchor} moment is clean 👏",
  198. "Love the energy on this one ✨",
  199. "This is low-key so good, great post 🙌",
  200. "Major main-feed energy right here 😮‍💨",
  201. "Ate this one, no notes 💯",
  202. "This made me stop scrolling fr 👀",
  203. "Super solid post, keep these coming 🚀"
  204. ]
  205. end
  206. def truncate_text(value, max:)
  207. text = value.to_s.strip
  208. return text if text.length <= max
  209. "#{text.byteslice(0, max)}..."
  210. end
  211. def compact_local_story_intelligence(payload)
  212. data = payload.is_a?(Hash) ? payload : {}
  213. {
  214. source: data[:source] || data["source"],
  215. reason: data[:reason] || data["reason"],
  216. ocr_text: truncate_text(data[:ocr_text] || data["ocr_text"], max: 600),
  217. transcript: truncate_text(data[:transcript] || data["transcript"], max: 600),
  218. objects: Array(data[:objects] || data["objects"]).map(&:to_s).reject(&:blank?).first(20),
  219. scenes: Array(data[:scenes] || data["scenes"]).first(20),
  220. hashtags: Array(data[:hashtags] || data["hashtags"]).map(&:to_s).reject(&:blank?).first(20),
  221. mentions: Array(data[:mentions] || data["mentions"]).map(&:to_s).reject(&:blank?).first(20),
  222. profile_handles: Array(data[:profile_handles] || data["profile_handles"]).map(&:to_s).reject(&:blank?).first(20),
  223. source_account_reference: (data[:source_account_reference] || data["source_account_reference"]).to_s.presence,
  224. source_profile_ids: Array(data[:source_profile_ids] || data["source_profile_ids"]).map(&:to_s).reject(&:blank?).first(10),
  225. media_type: (data[:media_type] || data["media_type"]).to_s.presence,
  226. face_count: (data[:face_count] || data["face_count"]).to_i,
  227. people: Array(data[:people] || data["people"]).first(10),
  228. object_detections: Array(data[:object_detections] || data["object_detections"]).first(25),
  229. ocr_blocks: Array(data[:ocr_blocks] || data["ocr_blocks"]).first(25)
  230. }.compact
  231. end
  232. def compact_cv_ocr_evidence(payload)
  233. data = payload.is_a?(Hash) ? payload : {}
  234. {
  235. source: data[:source] || data["source"],
  236. reason: data[:reason] || data["reason"],
  237. objects: Array(data[:objects] || data["objects"]).map(&:to_s).reject(&:blank?).first(20),
  238. scenes: Array(data[:scenes] || data["scenes"]).first(20),
  239. hashtags: Array(data[:hashtags] || data["hashtags"]).map(&:to_s).reject(&:blank?).first(20),
  240. mentions: Array(data[:mentions] || data["mentions"]).map(&:to_s).reject(&:blank?).first(20),
  241. profile_handles: Array(data[:profile_handles] || data["profile_handles"]).map(&:to_s).reject(&:blank?).first(20),
  242. source_account_reference: (data[:source_account_reference] || data["source_account_reference"]).to_s.presence,
  243. source_profile_ids: Array(data[:source_profile_ids] || data["source_profile_ids"]).map(&:to_s).reject(&:blank?).first(10),
  244. media_type: (data[:media_type] || data["media_type"]).to_s.presence,
  245. face_count: (data[:face_count] || data["face_count"]).to_i,
  246. people: Array(data[:people] || data["people"]).first(10),
  247. object_detections: Array(data[:object_detections] || data["object_detections"]).first(25),
  248. ocr_blocks: Array(data[:ocr_blocks] || data["ocr_blocks"]).first(25),
  249. ocr_text: truncate_text(data[:ocr_text] || data["ocr_text"], max: 600),
  250. transcript: truncate_text(data[:transcript] || data["transcript"], max: 600)
  251. }.compact
  252. end
  253. def compact_historical_comparison(payload)
  254. data = payload.is_a?(Hash) ? payload : {}
  255. {
  256. shared_topics: Array(data[:shared_topics] || data["shared_topics"]).first(12),
  257. novel_topics: Array(data[:novel_topics] || data["novel_topics"]).first(12),
  258. shared_objects: Array(data[:shared_objects] || data["shared_objects"]).first(12),
  259. novel_objects: Array(data[:novel_objects] || data["novel_objects"]).first(12),
  260. shared_scenes: Array(data[:shared_scenes] || data["shared_scenes"]).first(12),
  261. novel_scenes: Array(data[:novel_scenes] || data["novel_scenes"]).first(12),
  262. recurring_hashtags: Array(data[:recurring_hashtags] || data["recurring_hashtags"]).first(12),
  263. recurring_mentions: Array(data[:recurring_mentions] || data["recurring_mentions"]).first(12),
  264. recurring_people_ids: Array(data[:recurring_people_ids] || data["recurring_people_ids"]).first(12),
  265. has_historical_overlap: ActiveModel::Type::Boolean.new.cast(data[:has_historical_overlap] || data["has_historical_overlap"])
  266. }
  267. end
  268. def compact_verified_story_facts(payload, local_story_intelligence:, cv_ocr_evidence:)
  269. data = payload.is_a?(Hash) ? payload : {}
  270. if data.blank?
  271. data = compact_cv_ocr_evidence(cv_ocr_evidence)
  272. data[:signal_score] = 0 unless data.key?(:signal_score)
  273. end
  274. {
  275. source: data[:source] || data["source"],
  276. reason: data[:reason] || data["reason"],
  277. signal_score: (data[:signal_score] || data["signal_score"]).to_i,
  278. ocr_text: truncate_text(data[:ocr_text] || data["ocr_text"], max: 320),
  279. transcript: truncate_text(data[:transcript] || data["transcript"], max: 320),
  280. objects: Array(data[:objects] || data["objects"]).map(&:to_s).reject(&:blank?).first(15),
  281. object_detections: compact_object_detections(data[:object_detections] || data["object_detections"]),
  282. scenes: compact_scenes(data[:scenes] || data["scenes"]),
  283. hashtags: Array(data[:hashtags] || data["hashtags"]).map(&:to_s).reject(&:blank?).first(15),
  284. mentions: Array(data[:mentions] || data["mentions"]).map(&:to_s).reject(&:blank?).first(15),
  285. profile_handles: Array(data[:profile_handles] || data["profile_handles"]).map(&:to_s).reject(&:blank?).first(15),
  286. detected_usernames: Array(data[:detected_usernames] || data["detected_usernames"]).map(&:to_s).reject(&:blank?).first(15),
  287. source_profile_references: Array(data[:source_profile_references] || data["source_profile_references"]).map(&:to_s).reject(&:blank?).first(15),
  288. share_status: (data[:share_status] || data["share_status"]).to_s.presence,
  289. meme_markers: Array(data[:meme_markers] || data["meme_markers"]).map(&:to_s).reject(&:blank?).first(10),
  290. face_count: (data[:face_count] || data["face_count"]).to_i,
  291. faces: compact_faces_payload(data[:faces] || data["faces"]),
  292. identity_verification: compact_identity_verification(data[:identity_verification] || data["identity_verification"])
  293. }
  294. end
  295. def compact_story_ownership_classification(payload)
  296. data = payload.is_a?(Hash) ? payload : {}
  297. {
  298. label: data[:label] || data["label"],
  299. decision: data[:decision] || data["decision"],
  300. confidence: (data[:confidence] || data["confidence"]).to_f,
  301. summary: truncate_text(data[:summary] || data["summary"], max: 220),
  302. reason_codes: Array(data[:reason_codes] || data["reason_codes"]).first(10),
  303. detected_external_usernames: Array(data[:detected_external_usernames] || data["detected_external_usernames"]).first(10),
  304. source_profile_references: Array(data[:source_profile_references] || data["source_profile_references"]).first(10),
  305. share_status: data[:share_status] || data["share_status"]
  306. }
  307. end
  308. def compact_generation_policy(payload)
  309. data = payload.is_a?(Hash) ? payload : {}
  310. allow_comment_value = if data.key?(:allow_comment)
  311. data[:allow_comment]
  312. else
  313. data["allow_comment"]
  314. end
  315. {
  316. allow_comment: ActiveModel::Type::Boolean.new.cast(allow_comment_value),
  317. reason_code: data[:reason_code] || data["reason_code"],
  318. reason: truncate_text(data[:reason] || data["reason"], max: 220),
  319. classification: data[:classification] || data["classification"],
  320. signal_score: (data[:signal_score] || data["signal_score"]).to_i,
  321. historical_overlap: ActiveModel::Type::Boolean.new.cast(data[:historical_overlap] || data["historical_overlap"])
  322. }
  323. end
  324. def compact_profile_preparation(payload)
  325. data = payload.is_a?(Hash) ? payload : {}
  326. identity = data[:identity_consistency].is_a?(Hash) ? data[:identity_consistency] : (data["identity_consistency"].is_a?(Hash) ? data["identity_consistency"] : {})
  327. analysis = data[:analysis].is_a?(Hash) ? data[:analysis] : (data["analysis"].is_a?(Hash) ? data["analysis"] : {})
  328. {
  329. ready_for_comment_generation: ActiveModel::Type::Boolean.new.cast(data[:ready_for_comment_generation] || data["ready_for_comment_generation"]),
  330. reason_code: data[:reason_code] || data["reason_code"],
  331. reason: truncate_text(data[:reason] || data["reason"], max: 220),
  332. prepared_at: data[:prepared_at] || data["prepared_at"],
  333. analyzed_posts_count: (analysis[:analyzed_posts_count] || analysis["analyzed_posts_count"]).to_i,
  334. posts_with_structured_signals_count: (analysis[:posts_with_structured_signals_count] || analysis["posts_with_structured_signals_count"]).to_i,
  335. latest_posts_analyzed: ActiveModel::Type::Boolean.new.cast(analysis[:latest_posts_analyzed] || analysis["latest_posts_analyzed"]),
  336. identity_consistency: {
  337. consistent: ActiveModel::Type::Boolean.new.cast(identity[:consistent] || identity["consistent"]),
  338. reason_code: identity[:reason_code] || identity["reason_code"],
  339. dominance_ratio: (identity[:dominance_ratio] || identity["dominance_ratio"]).to_f,
  340. appearance_count: (identity[:appearance_count] || identity["appearance_count"]).to_i,
  341. total_faces: (identity[:total_faces] || identity["total_faces"]).to_i
  342. }
  343. }
  344. end
  345. def compact_verified_profile_history(rows)
  346. Array(rows).first(10).map do |row|
  347. data = row.is_a?(Hash) ? row : {}
  348. {
  349. shortcode: data[:shortcode] || data["shortcode"],
  350. taken_at: data[:taken_at] || data["taken_at"],
  351. topics: Array(data[:topics] || data["topics"]).first(6),
  352. objects: Array(data[:objects] || data["objects"]).first(6),
  353. hashtags: Array(data[:hashtags] || data["hashtags"]).first(6),
  354. mentions: Array(data[:mentions] || data["mentions"]).first(6),
  355. face_count: (data[:face_count] || data["face_count"]).to_i,
  356. primary_face_count: (data[:primary_face_count] || data["primary_face_count"]).to_i,
  357. secondary_face_count: (data[:secondary_face_count] || data["secondary_face_count"]).to_i,
  358. image_description: truncate_text(data[:image_description] || data["image_description"], max: 180)
  359. }
  360. end
  361. end
  362. def compact_conversational_voice(payload)
  363. data = payload.is_a?(Hash) ? payload : {}
  364. {
  365. author_type: data[:author_type] || data["author_type"],
  366. profile_tags: Array(data[:profile_tags] || data["profile_tags"]).first(10),
  367. bio_keywords: Array(data[:bio_keywords] || data["bio_keywords"]).first(10),
  368. recurring_topics: Array(data[:recurring_topics] || data["recurring_topics"]).first(12),
  369. recurring_hashtags: Array(data[:recurring_hashtags] || data["recurring_hashtags"]).first(10),
  370. frequent_people_labels: Array(data[:frequent_people_labels] || data["frequent_people_labels"]).first(8),
  371. prior_comment_examples: Array(data[:prior_comment_examples] || data["prior_comment_examples"]).map { |value| truncate_text(value, max: 100) }.first(6)
  372. }.compact
  373. end
  374. def compact_historical_story_context(rows)
  375. cutoff = 45.days.ago
  376. Array(rows).first(12).filter_map do |row|
  377. data = row.is_a?(Hash) ? row : {}
  378. occurred_at = parse_time(data[:occurred_at] || data["occurred_at"])
  379. next if occurred_at && occurred_at < cutoff
  380. {
  381. occurred_at: occurred_at&.iso8601,
  382. topics: Array(data[:topics] || data["topics"]).first(6),
  383. objects: Array(data[:objects] || data["objects"]).first(6),
  384. hashtags: Array(data[:hashtags] || data["hashtags"]).first(6),
  385. mentions: Array(data[:mentions] || data["mentions"]).first(6),
  386. profile_handles: Array(data[:profile_handles] || data["profile_handles"]).first(6),
  387. recurring_people_ids: Array(data[:people] || data["people"]).map { |person| person.is_a?(Hash) ? (person[:person_id] || person["person_id"]) : nil }.compact.first(4),
  388. face_count: (data[:face_count] || data["face_count"]).to_i
  389. }
  390. end.first(6)
  391. end
  392. def compact_author_profile(payload, author_type:)
  393. data = payload.is_a?(Hash) ? payload : {}
  394. {
  395. username: data[:username] || data["username"],
  396. display_name: truncate_text(data[:display_name] || data["display_name"], max: 80),
  397. author_type: author_type.to_s.presence || "unknown",
  398. bio_keywords: Array(data[:bio_keywords] || data["bio_keywords"]).map(&:to_s).reject(&:blank?).first(10)
  399. }
  400. end
  401. def compact_identity_verification(payload)
  402. data = payload.is_a?(Hash) ? payload : {}
  403. {
  404. owner_likelihood: data[:owner_likelihood] || data["owner_likelihood"],
  405. confidence: (data[:confidence] || data["confidence"]).to_f,
  406. primary_person_present: ActiveModel::Type::Boolean.new.cast(data[:primary_person_present] || data["primary_person_present"]),
  407. recurring_primary_person: ActiveModel::Type::Boolean.new.cast(data[:recurring_primary_person] || data["recurring_primary_person"]),
  408. bio_topic_overlap: Array(data[:bio_topic_overlap] || data["bio_topic_overlap"]).first(8),
  409. age_consistency: data[:age_consistency] || data["age_consistency"],
  410. gender_consistency: data[:gender_consistency] || data["gender_consistency"],
  411. reason_codes: Array(data[:reason_codes] || data["reason_codes"]).first(10)
  412. }
  413. end
  414. def compact_faces_payload(payload)
  415. data = payload.is_a?(Hash) ? payload : {}
  416. {
  417. total_count: data[:total_count] || data["total_count"],
  418. primary_user_count: data[:primary_user_count] || data["primary_user_count"],
  419. secondary_person_count: data[:secondary_person_count] || data["secondary_person_count"],
  420. unknown_count: data[:unknown_count] || data["unknown_count"],
  421. people: Array(data[:people] || data["people"]).map do |row|
  422. r = row.is_a?(Hash) ? row : {}
  423. {
  424. person_id: r[:person_id] || r["person_id"],
  425. role: r[:role] || r["role"],
  426. label: r[:label] || r["label"],
  427. similarity: (r[:similarity] || r["similarity"]).to_f,
  428. age_range: r[:age_range] || r["age_range"],
  429. gender: r[:gender] || r["gender"]
  430. }.compact
  431. end.first(8)
  432. }
  433. end
  434. def compact_object_detections(rows)
  435. Array(rows).filter_map do |row|
  436. data = row.is_a?(Hash) ? row : {}
  437. label = (data[:label] || data["label"]).to_s.strip
  438. next if label.blank?
  439. {
  440. label: label.downcase,
  441. confidence: (data[:confidence] || data["confidence"] || data[:score] || data["score"]).to_f.round(3)
  442. }
  443. end.uniq.first(8)
  444. end
  445. def compact_scenes(rows)
  446. Array(rows).filter_map do |row|
  447. data = row.is_a?(Hash) ? row : {}
  448. scene_type = (data[:type] || data["type"]).to_s.strip
  449. next if scene_type.blank?
  450. {
  451. type: scene_type.downcase,
  452. timestamp: (data[:timestamp] || data["timestamp"]).to_f.round(2)
  453. }
  454. end.uniq.first(8)
  455. end
  456. def parse_time(value)
  457. return nil if value.to_s.blank?
  458. Time.zone.parse(value.to_s)
  459. rescue StandardError
  460. nil
  461. end
  462. end
  463. end

app/services/ai/local_microservice_client.rb

0.0% lines covered

100.0% branches covered

597 relevant lines. 0 lines covered and 597 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "net/http"
  2. require "json"
  3. require "base64"
  4. require "tempfile"
  5. require "securerandom"
  6. module Ai
  7. class LocalMicroserviceClient
  8. BASE_URL = ENV.fetch("LOCAL_AI_SERVICE_URL", "http://localhost:8000").freeze
  9. HTTP_OPEN_TIMEOUT_SECONDS = ENV.fetch("LOCAL_AI_HTTP_OPEN_TIMEOUT_SECONDS", 20).to_i.clamp(3, 120)
  10. HTTP_READ_TIMEOUT_SECONDS = ENV.fetch("LOCAL_AI_HTTP_READ_TIMEOUT_SECONDS", 120).to_i.clamp(10, 600)
  11. MAX_IMAGE_UPLOAD_BYTES = ENV.fetch("LOCAL_AI_MAX_IMAGE_UPLOAD_BYTES", 20 * 1024 * 1024).to_i
  12. MAX_VIDEO_UPLOAD_BYTES = ENV.fetch("LOCAL_AI_MAX_VIDEO_UPLOAD_BYTES", 80 * 1024 * 1024).to_i
  13. MIN_IMAGE_UPLOAD_BYTES = ENV.fetch("LOCAL_AI_MIN_IMAGE_UPLOAD_BYTES", 128).to_i
  14. MIN_VIDEO_UPLOAD_BYTES = ENV.fetch("LOCAL_AI_MIN_VIDEO_UPLOAD_BYTES", 1024).to_i
  15. def initialize(service_url: nil)
  16. @base_url = service_url || BASE_URL
  17. end
  18. def test_connection!
  19. response = get_json("/health")
  20. raise "Local AI service unavailable" unless response["status"] == "healthy"
  21. {
  22. ok: true,
  23. message: "Local AI service is healthy",
  24. services: response["services"]
  25. }
  26. rescue StandardError => e
  27. { ok: false, message: e.message.to_s }
  28. end
  29. def analyze_image_bytes!(bytes, features:, usage_category: "image_analysis", usage_context: nil)
  30. bytes_data = bytes.to_s.b
  31. validate_image_bytes!(bytes_data)
  32. # Convert feature names to match microservice expectations
  33. service_features = convert_features(features)
  34. # Create temporary file for upload
  35. temp_file = Tempfile.new(["image_analysis", ".jpg"])
  36. begin
  37. temp_file.binmode
  38. temp_file.write(bytes_data)
  39. temp_file.flush
  40. # Upload to microservice
  41. response = upload_file("/analyze/image", temp_file.path, { features: service_features.join(",") })
  42. # Convert response to match Google Vision format
  43. convert_vision_response(response)
  44. ensure
  45. temp_file.close
  46. temp_file.unlink
  47. end
  48. end
  49. def analyze_image_uri!(url, features:, usage_category: "image_analysis", usage_context: nil)
  50. # Download image from URL
  51. uri = URI.parse(url)
  52. http = Net::HTTP.new(uri.host, uri.port)
  53. http.use_ssl = uri.scheme == "https"
  54. http.open_timeout = HTTP_OPEN_TIMEOUT_SECONDS
  55. http.read_timeout = [HTTP_READ_TIMEOUT_SECONDS, 90].min
  56. response = http.get(uri.request_uri)
  57. raise "Failed to download image: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
  58. analyze_image_bytes!(response.body, features: features, usage_category: usage_category, usage_context: usage_context)
  59. end
  60. def analyze_video_bytes!(bytes, features:, usage_context: nil)
  61. bytes_data = bytes.to_s.b
  62. validate_video_bytes!(bytes_data)
  63. service_features = convert_video_features(features)
  64. temp_file = Tempfile.new(["video_analysis", ".mp4"])
  65. begin
  66. temp_file.binmode
  67. temp_file.write(bytes_data)
  68. temp_file.flush
  69. response = upload_file("/analyze/video", temp_file.path, {
  70. features: service_features.join(","),
  71. sample_rate: 2 # Sample every 2 seconds
  72. })
  73. convert_video_response(response)
  74. ensure
  75. temp_file.close
  76. temp_file.unlink
  77. end
  78. end
  79. def fetch_video_operation!(name, usage_context: nil)
  80. # Local microservice processes synchronously, so return completed status
  81. {
  82. "done" => true,
  83. "response" => { "annotationResults" => [{}] }
  84. }
  85. end
  86. def generate_text_json!(model:, prompt:, temperature: 0.8, max_output_tokens: 900, usage_category: "text_generation", usage_context: nil)
  87. # Use Ollama for text generation
  88. ollama_client = Ai::OllamaClient.new
  89. response = ollama_client.generate(
  90. model: model,
  91. prompt: prompt,
  92. temperature: temperature,
  93. max_tokens: max_output_tokens
  94. )
  95. # Parse JSON response from LLM
  96. parsed = JSON.parse(response["response"]) rescue nil
  97. {
  98. raw: response,
  99. text: response["response"],
  100. json: parsed,
  101. usage: {
  102. input_tokens: response.dig("prompt_eval_count") || 0,
  103. output_tokens: response.dig("eval_count") || 0,
  104. total_tokens: (response.dig("prompt_eval_count") || 0) + (response.dig("eval_count") || 0)
  105. }
  106. }
  107. end
  108. # Returns normalized payload for local story intelligence extraction.
  109. # Expected keys:
  110. # - faces: [{ confidence:, bounding_box:, landmarks:, likelihoods: {} }]
  111. # - ocr_text: "..."
  112. # - ocr_blocks: [{ text:, confidence:, bbox:, source: }]
  113. # - content_labels: ["person", "beach", ...]
  114. # - object_detections: [{ label:, confidence:, bbox: }]
  115. # - location_tags: []
  116. # - mentions: ["@user"]
  117. # - hashtags: ["#tag"]
  118. def detect_faces_and_ocr!(image_bytes:, usage_context: nil)
  119. bytes_data = image_bytes.to_s.b
  120. validate_image_bytes!(bytes_data)
  121. temp_file = Tempfile.new(["story_intel", ".jpg"])
  122. begin
  123. temp_file.binmode
  124. temp_file.write(bytes_data)
  125. temp_file.flush
  126. ocr_warning = nil
  127. begin
  128. response = upload_file("/analyze/image", temp_file.path, { features: "labels,text,faces" })
  129. payload, results = unpack_response_payload!(
  130. response: response,
  131. operation: "detect_faces_and_ocr",
  132. expected_keys: %w[labels text faces]
  133. )
  134. rescue StandardError => e
  135. ocr_warning = {
  136. "feature" => "text",
  137. "error_class" => e.class.name.to_s,
  138. "error_message" => e.message.to_s.byteslice(0, 260),
  139. "fallback" => "labels_faces_only"
  140. }
  141. fallback_response = upload_file("/analyze/image", temp_file.path, { features: "labels,faces" })
  142. payload, results = unpack_response_payload!(
  143. response: fallback_response,
  144. operation: "detect_faces_without_text",
  145. expected_keys: %w[labels faces]
  146. )
  147. end
  148. text_rows = Array(results["text"])
  149. text_rows = text_rows.map do |row|
  150. if row.is_a?(Hash)
  151. source_name = row["source"].to_s.presence || "ocr"
  152. variant_name = row["variant"].to_s.presence
  153. {
  154. "text" => row["text"].to_s,
  155. "confidence" => row["confidence"],
  156. "bbox" => normalize_bounding_box(row["bbox"]),
  157. "source" => [source_name, variant_name].compact.join(":"),
  158. "variant" => variant_name
  159. }
  160. else
  161. { "text" => row.to_s, "confidence" => nil, "bbox" => {}, "source" => "ocr", "variant" => nil }
  162. end
  163. end
  164. ocr_blocks = text_rows
  165. .map do |row|
  166. {
  167. "text" => row["text"].to_s.strip,
  168. "confidence" => row["confidence"].to_f,
  169. "bbox" => row["bbox"].is_a?(Hash) ? row["bbox"] : {},
  170. "source" => row["source"].to_s.presence || "ocr",
  171. "variant" => row["variant"].to_s.presence
  172. }
  173. end
  174. .reject { |row| row["text"].blank? }
  175. .first(80)
  176. ocr_text = ocr_blocks.map { |row| row["text"] }.uniq.join("\n").presence
  177. object_detections = Array(results["labels"])
  178. .map do |row|
  179. if row.is_a?(Hash)
  180. {
  181. "label" => (row["label"] || row["description"]).to_s,
  182. "confidence" => (row["confidence"] || row["score"]).to_f,
  183. "bbox" => normalize_bounding_box(row["bbox"])
  184. }
  185. else
  186. { "label" => row.to_s, "confidence" => nil, "bbox" => {} }
  187. end
  188. end
  189. .reject { |row| row["label"].blank? }
  190. .first(80)
  191. labels = object_detections
  192. .map { |row| row["label"] }
  193. .map(&:to_s)
  194. .map(&:strip)
  195. .reject(&:blank?)
  196. .uniq
  197. .first(40)
  198. faces = Array(results["faces"]).map { |face| normalize_face(face) }
  199. mentions = ocr_text.to_s.scan(/@[a-zA-Z0-9._]+/).map(&:downcase).uniq.first(40)
  200. hashtags = ocr_text.to_s.scan(/#[a-zA-Z0-9_]+/).map(&:downcase).uniq.first(40)
  201. profile_handles = ocr_blocks
  202. .flat_map { |row| row["text"].to_s.scan(/\b([a-zA-Z0-9._]{3,30})\b/) }
  203. .map { |match| match.is_a?(Array) ? match.first.to_s.downcase : match.to_s.downcase }
  204. .select { |token| token.include?("_") || token.include?(".") }
  205. .reject { |token| token.include?("instagram.com") }
  206. .uniq
  207. .first(40)
  208. {
  209. "faces" => faces,
  210. "ocr_text" => ocr_text,
  211. "ocr_blocks" => ocr_blocks,
  212. "location_tags" => [],
  213. "content_labels" => labels,
  214. "object_detections" => object_detections,
  215. "mentions" => mentions,
  216. "hashtags" => hashtags,
  217. "profile_handles" => profile_handles,
  218. "metadata" => {
  219. "source" => "local_microservice",
  220. "usage_context" => usage_context.to_h,
  221. "warnings" => (
  222. Array(payload.dig("metadata", "warnings")) +
  223. Array(ocr_warning)
  224. ).first(20)
  225. }
  226. }
  227. ensure
  228. temp_file.close
  229. temp_file.unlink
  230. end
  231. end
  232. # Returns normalized story intelligence from /analyze/video.
  233. # - scenes: [{ timestamp:, type:, correlation: }]
  234. # - content_labels: [..]
  235. # - object_detections: [{ label:, confidence:, timestamps: [] }]
  236. # - ocr_text / ocr_blocks
  237. # - faces: [{ first_seen:, last_seen:, detection_count: }]
  238. # - mentions / hashtags
  239. def analyze_video_story_intelligence!(video_bytes:, sample_rate: 2, usage_context: nil)
  240. bytes_data = video_bytes.to_s.b
  241. validate_video_bytes!(bytes_data)
  242. temp_file = Tempfile.new(["story_video_intel", ".mp4"])
  243. begin
  244. temp_file.binmode
  245. temp_file.write(bytes_data)
  246. temp_file.flush
  247. response = upload_file("/analyze/video", temp_file.path, {
  248. features: "labels,faces,scenes,text",
  249. sample_rate: sample_rate.to_i.clamp(1, 5)
  250. })
  251. payload, results = unpack_response_payload!(
  252. response: response,
  253. operation: "analyze_video_story_intelligence",
  254. expected_keys: %w[labels faces scenes text]
  255. )
  256. scenes = Array(results["scenes"]).map do |row|
  257. next unless row.is_a?(Hash)
  258. {
  259. "timestamp" => row["timestamp"],
  260. "type" => row["type"].to_s.presence || "scene_change",
  261. "correlation" => row["correlation"]
  262. }.compact
  263. end.compact.first(80)
  264. object_detections = Array(results["labels"]).map do |row|
  265. next unless row.is_a?(Hash)
  266. label = (row["label"] || row["description"]).to_s.strip
  267. next if label.blank?
  268. {
  269. "label" => label,
  270. "confidence" => (row["max_confidence"] || row["confidence"]).to_f,
  271. "timestamps" => Array(row["timestamps"]).map(&:to_f).first(80)
  272. }
  273. end.compact.first(80)
  274. content_labels = object_detections.map { |row| row["label"].to_s.downcase }.uniq.first(50)
  275. ocr_blocks = Array(results["text"]).map do |row|
  276. next unless row.is_a?(Hash)
  277. text = row["text"].to_s.strip
  278. next if text.blank?
  279. {
  280. "text" => text,
  281. "confidence" => row["confidence"].to_f,
  282. "timestamp" => row["timestamp"],
  283. "bbox" => normalize_bounding_box(row["bbox"]),
  284. "source" => row["source"].to_s.presence || "ocr_video"
  285. }.compact
  286. end.compact.first(120)
  287. ocr_text = ocr_blocks.map { |row| row["text"] }.uniq.join("\n").presence
  288. faces = Array(results["faces"]).map do |row|
  289. next unless row.is_a?(Hash)
  290. {
  291. "first_seen" => row["first_seen"],
  292. "last_seen" => row["last_seen"],
  293. "detection_count" => row["detection_count"].to_i
  294. }.compact
  295. end.compact.first(60)
  296. mentions = ocr_text.to_s.scan(/@[a-zA-Z0-9._]+/).map(&:downcase).uniq.first(40)
  297. hashtags = ocr_text.to_s.scan(/#[a-zA-Z0-9_]+/).map(&:downcase).uniq.first(40)
  298. {
  299. "scenes" => scenes,
  300. "content_labels" => content_labels,
  301. "object_detections" => object_detections,
  302. "ocr_text" => ocr_text,
  303. "ocr_blocks" => ocr_blocks,
  304. "faces" => faces,
  305. "mentions" => mentions,
  306. "hashtags" => hashtags,
  307. "metadata" => {
  308. "source" => "local_microservice_video",
  309. "usage_context" => usage_context.to_h,
  310. "warnings" => Array(payload.dig("metadata", "warnings")).first(20)
  311. }
  312. }
  313. ensure
  314. temp_file.close
  315. temp_file.unlink
  316. end
  317. end
  318. private
  319. def convert_features(google_features)
  320. # Convert Google Vision feature names to local service names
  321. feature_map = {
  322. "LABEL_DETECTION" => "labels",
  323. "TEXT_DETECTION" => "text",
  324. "FACE_DETECTION" => "faces"
  325. }
  326. google_features.map { |f|
  327. feature_type = f.is_a?(Hash) ? f[:type] || f["type"] : f.to_s
  328. feature_map[feature_type]
  329. }.compact.uniq
  330. end
  331. def convert_video_features(google_features)
  332. # Convert Google Video Intelligence feature names
  333. feature_map = {
  334. "LABEL_DETECTION" => "labels",
  335. "SHOT_CHANGE_DETECTION" => "scenes",
  336. "FACE_DETECTION" => "faces",
  337. "EXPLICIT_CONTENT_DETECTION" => "labels" # Use labels for explicit content
  338. }
  339. google_features.map { |f| feature_map[f.to_s] }.compact.uniq
  340. end
  341. def convert_vision_response(response)
  342. _payload, results = unpack_response_payload!(
  343. response: response,
  344. operation: "analyze_image",
  345. expected_keys: %w[labels text faces]
  346. )
  347. # Convert to Google Vision format
  348. vision_response = {}
  349. # Labels
  350. if results.key?("labels")
  351. vision_response["labelAnnotations"] = Array(results["labels"]).map do |label|
  352. {
  353. "description" => (label.is_a?(Hash) ? (label["label"] || label["description"]) : label).to_s,
  354. "score" => (label.is_a?(Hash) ? (label["confidence"] || label["score"]) : nil),
  355. "topicality" => (label.is_a?(Hash) ? (label["confidence"] || label["score"]) : nil)
  356. }
  357. end
  358. end
  359. # Text
  360. if results.key?("text")
  361. vision_response["textAnnotations"] = Array(results["text"]).map.with_index do |text, i|
  362. entry = text.is_a?(Hash) ? text : { "text" => text.to_s, "confidence" => nil, "bbox" => nil }
  363. {
  364. "description" => entry["text"].to_s,
  365. "confidence" => entry["confidence"],
  366. "boundingPoly" => {
  367. "vertices" => convert_bbox_to_vertices(entry["bbox"])
  368. }
  369. }
  370. end
  371. end
  372. # Faces
  373. if results.key?("faces")
  374. vision_response["faceAnnotations"] = Array(results["faces"]).map do |face|
  375. entry = face.is_a?(Hash) ? face : {}
  376. {
  377. "boundingPoly" => {
  378. "vertices" => convert_bbox_to_vertices(entry["bbox"] || entry["bounding_box"])
  379. },
  380. "confidence" => entry["confidence"],
  381. "landmarks" => convert_landmarks(entry["landmarks"])
  382. }
  383. end
  384. end
  385. vision_response
  386. end
  387. def convert_video_response(response)
  388. _payload, results = unpack_response_payload!(
  389. response: response,
  390. operation: "analyze_video",
  391. expected_keys: %w[labels scenes faces]
  392. )
  393. video_response = {
  394. "annotationResults" => [{}]
  395. }
  396. # Labels
  397. if results.key?("labels")
  398. video_response["annotationResults"][0]["segmentLabelAnnotations"] = Array(results["labels"]).map do |label|
  399. row = label.is_a?(Hash) ? label : { "label" => label.to_s, "max_confidence" => 0.0, "timestamps" => [] }
  400. {
  401. "entity" => {
  402. "description" => (row["label"] || row["description"]).to_s,
  403. "confidence" => (row["max_confidence"] || row["confidence"]).to_f
  404. },
  405. "segments" => Array(row["timestamps"]).map.with_index do |timestamp, i|
  406. {
  407. "segment" => {
  408. "startTimeOffset" => "#{timestamp.to_i}s"
  409. }
  410. }
  411. end
  412. }
  413. end
  414. end
  415. # Shot changes
  416. if results.key?("scenes")
  417. video_response["annotationResults"][0]["shotAnnotations"] = Array(results["scenes"]).map do |scene|
  418. row = scene.is_a?(Hash) ? scene : {}
  419. {
  420. "startTimeOffset" => "#{row["timestamp"].to_i}s"
  421. }
  422. end
  423. end
  424. video_response
  425. end
  426. def convert_bbox_to_vertices(bbox)
  427. return [] unless bbox
  428. if bbox.is_a?(Array) && bbox.length == 4 && bbox.first.is_a?(Array)
  429. # Format: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
  430. bbox.map { |point| { "x" => point[0].to_i, "y" => point[1].to_i } }
  431. elsif bbox.is_a?(Array) && bbox.length == 4
  432. # Format: [x1, y1, x2, y2]
  433. [
  434. { "x" => bbox[0].to_i, "y" => bbox[1].to_i },
  435. { "x" => bbox[2].to_i, "y" => bbox[1].to_i },
  436. { "x" => bbox[2].to_i, "y" => bbox[3].to_i },
  437. { "x" => bbox[0].to_i, "y" => bbox[3].to_i }
  438. ]
  439. elsif bbox.is_a?(Hash)
  440. x1 = (bbox["x1"] || bbox[:x1] || bbox["left"] || bbox[:left]).to_f
  441. y1 = (bbox["y1"] || bbox[:y1] || bbox["top"] || bbox[:top]).to_f
  442. x2 = (bbox["x2"] || bbox[:x2] || bbox["right"] || bbox[:right]).to_f
  443. y2 = (bbox["y2"] || bbox[:y2] || bbox["bottom"] || bbox[:bottom]).to_f
  444. [
  445. { "x" => x1.to_i, "y" => y1.to_i },
  446. { "x" => x2.to_i, "y" => y1.to_i },
  447. { "x" => x2.to_i, "y" => y2.to_i },
  448. { "x" => x1.to_i, "y" => y2.to_i }
  449. ]
  450. else
  451. []
  452. end
  453. end
  454. def normalize_face(face)
  455. raw = face.is_a?(Hash) ? face : {}
  456. bbox = raw["bounding_box"] || raw["bbox"] || raw[:bounding_box] || raw[:bbox]
  457. landmarks_raw = raw["landmarks"] || raw[:landmarks]
  458. {
  459. "confidence" => (raw["confidence"] || raw[:confidence]).to_f,
  460. "bounding_box" => normalize_bounding_box(bbox),
  461. "landmarks" => normalize_landmarks(landmarks_raw),
  462. "likelihoods" => (raw["likelihoods"] || raw[:likelihoods] || {})
  463. }
  464. end
  465. def normalize_bounding_box(value)
  466. if value.is_a?(Array) && value.length == 4 && value.first.is_a?(Numeric)
  467. { "x1" => value[0], "y1" => value[1], "x2" => value[2], "y2" => value[3] }
  468. elsif value.is_a?(Array) && value.length == 4 && value.first.is_a?(Array)
  469. xs = value.map { |pt| pt[0].to_f }
  470. ys = value.map { |pt| pt[1].to_f }
  471. { "x1" => xs.min, "y1" => ys.min, "x2" => xs.max, "y2" => ys.max }
  472. elsif value.is_a?(Hash)
  473. value
  474. else
  475. {}
  476. end
  477. end
  478. def normalize_landmarks(value)
  479. Array(value).first(24).filter_map do |item|
  480. if item.is_a?(Hash)
  481. {
  482. "type" => item["type"].to_s.presence || "UNKNOWN",
  483. "x" => item["x"] || item.dig("position", "x"),
  484. "y" => item["y"] || item.dig("position", "y"),
  485. "z" => item["z"] || item.dig("position", "z")
  486. }
  487. elsif item.is_a?(Array)
  488. { "type" => "UNKNOWN", "x" => item[0], "y" => item[1], "z" => item[2] }
  489. end
  490. end
  491. end
  492. def convert_landmarks(landmarks)
  493. return [] unless landmarks
  494. landmarks.map do |landmark|
  495. if landmark.is_a?(Hash)
  496. x = landmark["x"] || landmark[:x] || landmark.dig("position", "x")
  497. y = landmark["y"] || landmark[:y] || landmark.dig("position", "y")
  498. z = landmark["z"] || landmark[:z] || landmark.dig("position", "z")
  499. {
  500. "type" => (landmark["type"] || landmark[:type] || "UNKNOWN_LANDMARK").to_s,
  501. "position" => {
  502. "x" => x.to_f.to_i,
  503. "y" => y.to_f.to_i,
  504. "z" => z.to_f.to_i
  505. }
  506. }
  507. else
  508. {
  509. "type" => "UNKNOWN_LANDMARK", # Would need proper mapping
  510. "position" => {
  511. "x" => landmark[0].to_i,
  512. "y" => landmark[1].to_i,
  513. "z" => (landmark[2].to_i rescue 0)
  514. }
  515. }
  516. end
  517. end
  518. end
  519. def get_json(endpoint)
  520. uri = URI.parse("#{@base_url}#{endpoint}")
  521. http = Net::HTTP.new(uri.host, uri.port)
  522. http.open_timeout = HTTP_OPEN_TIMEOUT_SECONDS
  523. http.read_timeout = HTTP_READ_TIMEOUT_SECONDS
  524. request = Net::HTTP::Get.new(uri.request_uri)
  525. request["Accept"] = "application/json"
  526. response = http.request(request)
  527. body = JSON.parse(response.body.to_s.presence || "{}")
  528. return body if response.is_a?(Net::HTTPSuccess)
  529. error = extract_http_error_message(body: body, raw_body: response.body)
  530. raise "Local AI service error: HTTP #{response.code} #{response.message} - #{error}"
  531. rescue JSON::ParserError
  532. raise "Local AI service error: HTTP #{response.code} #{response.message} - #{response.body.to_s.byteslice(0, 500)}"
  533. end
  534. def upload_file(endpoint, file_path, params = {})
  535. uri = URI.parse("#{@base_url}#{endpoint}")
  536. # Create multipart form data
  537. boundary = "----WebKitFormBoundary#{SecureRandom.hex(16)}"
  538. post_body = []
  539. # Add file
  540. file_content = File.read(file_path)
  541. filename = File.basename(file_path)
  542. post_body << "--#{boundary}\r\n"
  543. post_body << "Content-Disposition: form-data; name=\"file\"; filename=\"#{filename}\"\r\n"
  544. post_body << "Content-Type: application/octet-stream\r\n\r\n"
  545. post_body << file_content
  546. post_body << "\r\n"
  547. # Add parameters
  548. params.each do |key, value|
  549. post_body << "--#{boundary}\r\n"
  550. post_body << "Content-Disposition: form-data; name=\"#{key}\"\r\n\r\n"
  551. post_body << value.to_s
  552. post_body << "\r\n"
  553. end
  554. post_body << "--#{boundary}--\r\n"
  555. http = Net::HTTP.new(uri.host, uri.port)
  556. http.open_timeout = HTTP_OPEN_TIMEOUT_SECONDS
  557. http.read_timeout = HTTP_READ_TIMEOUT_SECONDS
  558. request = Net::HTTP::Post.new(uri.request_uri)
  559. request["Content-Type"] = "multipart/form-data; boundary=#{boundary}"
  560. request["Accept"] = "application/json"
  561. request.body = post_body.join
  562. response = http.request(request)
  563. body = JSON.parse(response.body.to_s.presence || "{}")
  564. return body if response.is_a?(Net::HTTPSuccess)
  565. error = extract_http_error_message(body: body, raw_body: response.body)
  566. raise "Local AI service error: HTTP #{response.code} #{response.message} - #{error}"
  567. rescue JSON::ParserError
  568. raise "Local AI service error: HTTP #{response.code} #{response.message} - #{response.body.to_s.byteslice(0, 500)}"
  569. end
  570. def unpack_response_payload!(response:, operation:, expected_keys:)
  571. payload = response.is_a?(Hash) ? deep_stringify_hash(response) : {}
  572. results = payload["results"].is_a?(Hash) ? payload["results"] : payload
  573. explicit_failure = payload.key?("success") && !ActiveModel::Type::Boolean.new.cast(payload["success"])
  574. has_expected_keys = Array(expected_keys).map(&:to_s).any? { |key| results.key?(key) }
  575. if explicit_failure && !has_expected_keys
  576. raise "Local AI #{operation} failed: #{response_error_message(payload)}"
  577. end
  578. if results.empty? && !has_expected_keys
  579. if explicit_failure
  580. raise "Local AI #{operation} failed: #{response_error_message(payload)}"
  581. end
  582. end
  583. [ payload, results ]
  584. end
  585. def response_error_message(payload)
  586. return "unknown error" unless payload.is_a?(Hash)
  587. error_value = payload["error"]
  588. nested_error = error_value.is_a?(Hash) ? error_value["message"].to_s.presence : nil
  589. nested_error ||
  590. error_value.to_s.presence ||
  591. payload["message"].to_s.presence ||
  592. payload["detail"].to_s.presence ||
  593. "unknown error"
  594. end
  595. def validate_image_bytes!(bytes)
  596. raise ArgumentError, "image_bytes_missing" if bytes.blank?
  597. raise ArgumentError, "image_bytes_too_small" if bytes.bytesize < MIN_IMAGE_UPLOAD_BYTES
  598. raise ArgumentError, "image_bytes_too_large" if bytes.bytesize > MAX_IMAGE_UPLOAD_BYTES
  599. end
  600. def validate_video_bytes!(bytes)
  601. raise ArgumentError, "video_bytes_missing" if bytes.blank?
  602. raise ArgumentError, "video_bytes_too_small" if bytes.bytesize < MIN_VIDEO_UPLOAD_BYTES
  603. raise ArgumentError, "video_bytes_too_large" if bytes.bytesize > MAX_VIDEO_UPLOAD_BYTES
  604. end
  605. def extract_http_error_message(body:, raw_body:)
  606. payload = body.is_a?(Hash) ? body : {}
  607. error_value = payload["error"]
  608. nested_error = error_value.is_a?(Hash) ? error_value["message"].to_s.presence : nil
  609. detail =
  610. case payload["detail"]
  611. when Hash
  612. payload["detail"]["message"].to_s.presence
  613. else
  614. payload["detail"].to_s.presence
  615. end
  616. nested_error ||
  617. error_value.to_s.presence ||
  618. payload["message"].to_s.presence ||
  619. detail ||
  620. raw_body.to_s.byteslice(0, 500)
  621. end
  622. def deep_stringify_hash(value)
  623. case value
  624. when Hash
  625. value.each_with_object({}) do |(key, child), out|
  626. out[key.to_s] = deep_stringify_hash(child)
  627. end
  628. when Array
  629. value.map { |child| deep_stringify_hash(child) }
  630. else
  631. value
  632. end
  633. end
  634. end
  635. end

app/services/ai/ollama_client.rb

0.0% lines covered

100.0% branches covered

109 relevant lines. 0 lines covered and 109 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "net/http"
  2. require "json"
  3. module Ai
  4. class OllamaClient
  5. BASE_URL = ENV.fetch("OLLAMA_URL", "http://localhost:11434").freeze
  6. DEFAULT_MODEL = ENV.fetch("OLLAMA_MODEL", "mistral:7b").freeze
  7. OPEN_TIMEOUT_SECONDS = ENV.fetch("OLLAMA_OPEN_TIMEOUT_SECONDS", "12").to_i.clamp(5, 60)
  8. READ_TIMEOUT_SECONDS = ENV.fetch("OLLAMA_READ_TIMEOUT_SECONDS", "240").to_i.clamp(30, 600)
  9. def initialize(base_url: nil, default_model: nil)
  10. @base_url = base_url || BASE_URL
  11. @default_model = default_model || DEFAULT_MODEL
  12. end
  13. def test_connection!
  14. response = get_json("/api/tags")
  15. models = response["models"] || []
  16. {
  17. ok: true,
  18. message: "Ollama is available",
  19. models: models.map { |m| m["name"] },
  20. default_model: @default_model
  21. }
  22. rescue StandardError => e
  23. { ok: false, message: e.message.to_s }
  24. end
  25. def generate(model:, prompt:, temperature: 0.8, max_tokens: 900)
  26. payload = {
  27. model: model || @default_model,
  28. prompt: prompt,
  29. options: {
  30. temperature: temperature,
  31. num_predict: max_tokens
  32. },
  33. keep_alive: ENV.fetch("OLLAMA_KEEP_ALIVE", "10m"),
  34. stream: false
  35. }
  36. response = post_json("/api/generate", payload)
  37. {
  38. "model" => response["model"],
  39. "response" => response["response"],
  40. "done" => response["done"],
  41. "prompt_eval_count" => response["prompt_eval_count"],
  42. "eval_count" => response["eval_count"],
  43. "total_duration" => response["total_duration"],
  44. "load_duration" => response["load_duration"]
  45. }
  46. end
  47. def chat(model:, messages:, temperature: 0.8, max_tokens: 900)
  48. payload = {
  49. model: model || @default_model,
  50. messages: messages,
  51. options: {
  52. temperature: temperature,
  53. num_predict: max_tokens
  54. },
  55. keep_alive: ENV.fetch("OLLAMA_KEEP_ALIVE", "10m"),
  56. stream: false
  57. }
  58. response = post_json("/api/chat", payload)
  59. {
  60. "model" => response["model"],
  61. "message" => response["message"],
  62. "done" => response["done"],
  63. "prompt_eval_count" => response["prompt_eval_count"],
  64. "eval_count" => response["eval_count"]
  65. }
  66. end
  67. def list_models
  68. response = get_json("/api/tags")
  69. response["models"] || []
  70. end
  71. def pull_model(model_name)
  72. # This would need to be a streaming implementation for real use
  73. # For now, just trigger the pull
  74. payload = { name: model_name }
  75. post_json("/api/pull", payload)
  76. end
  77. private
  78. def get_json(endpoint)
  79. uri = URI.parse("#{@base_url}#{endpoint}")
  80. http = Net::HTTP.new(uri.host, uri.port)
  81. http.open_timeout = OPEN_TIMEOUT_SECONDS
  82. http.read_timeout = [READ_TIMEOUT_SECONDS, 60].min
  83. request = Net::HTTP::Get.new(uri.request_uri)
  84. request["Accept"] = "application/json"
  85. response = http.request(request)
  86. body = JSON.parse(response.body.to_s.presence || "{}")
  87. return body if response.is_a?(Net::HTTPSuccess)
  88. error = body["error"].presence || response.body.to_s.byteslice(0, 500)
  89. raise "Ollama error: HTTP #{response.code} #{response.message} - #{error}"
  90. rescue JSON::ParserError
  91. raise "Ollama error: HTTP #{response.code} #{response.message} - #{response.body.to_s.byteslice(0, 500)}"
  92. end
  93. def post_json(endpoint, payload)
  94. uri = URI.parse("#{@base_url}#{endpoint}")
  95. http = Net::HTTP.new(uri.host, uri.port)
  96. http.open_timeout = OPEN_TIMEOUT_SECONDS
  97. http.read_timeout = READ_TIMEOUT_SECONDS
  98. request = Net::HTTP::Post.new(uri.request_uri)
  99. request["Content-Type"] = "application/json"
  100. request["Accept"] = "application/json"
  101. request.body = JSON.generate(payload)
  102. response = http.request(request)
  103. body = JSON.parse(response.body.to_s.presence || "{}")
  104. return body if response.is_a?(Net::HTTPSuccess)
  105. error = body["error"].presence || response.body.to_s.byteslice(0, 500)
  106. raise "Ollama error: HTTP #{response.code} #{response.message} - #{error}"
  107. rescue JSON::ParserError
  108. raise "Ollama error: HTTP #{response.code} #{response.message} - #{response.body.to_s.byteslice(0, 500)}"
  109. end
  110. end
  111. end

app/services/ai/post_analysis_context_builder.rb

0.0% lines covered

100.0% branches covered

258 relevant lines. 0 lines covered and 258 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "base64"
  2. require "digest"
  3. require "uri"
  4. module Ai
  5. class PostAnalysisContextBuilder
  6. MAX_INLINE_IMAGE_BYTES = ENV.fetch("AI_MAX_INLINE_IMAGE_BYTES", 2 * 1024 * 1024).to_i
  7. MAX_INLINE_VIDEO_BYTES = ENV.fetch("AI_MAX_INLINE_VIDEO_BYTES", 12 * 1024 * 1024).to_i
  8. MAX_DIRECT_IMAGE_ANALYSIS_BYTES = ENV.fetch("AI_MAX_DIRECT_IMAGE_ANALYSIS_BYTES", 10 * 1024 * 1024).to_i
  9. MAX_DIRECT_VIDEO_ANALYSIS_BYTES = ENV.fetch("AI_MAX_DIRECT_VIDEO_ANALYSIS_BYTES", 40 * 1024 * 1024).to_i
  10. MAX_ABSOLUTE_MEDIA_BYTES = ENV.fetch("AI_MAX_ABSOLUTE_MEDIA_BYTES", 120 * 1024 * 1024).to_i
  11. MIN_MEDIA_BYTES = ENV.fetch("AI_MIN_MEDIA_BYTES", 512).to_i
  12. IMAGE_RESIZE_MAX_DIMENSION = ENV.fetch("AI_IMAGE_RESIZE_MAX_DIMENSION", 1920).to_i
  13. MAX_VIDEO_FRAME_ANALYSIS_BYTES = ENV.fetch("AI_VIDEO_FRAME_MAX_BYTES", 35 * 1024 * 1024).to_i
  14. def initialize(profile:, post:)
  15. @profile = profile
  16. @post = post
  17. end
  18. attr_reader :profile, :post
  19. def payload
  20. {
  21. post: {
  22. shortcode: post.shortcode,
  23. caption: post.caption,
  24. taken_at: post.taken_at&.iso8601,
  25. permalink: post.permalink_url,
  26. likes_count: post.likes_count,
  27. comments_count: post.comments_count,
  28. comments: post.instagram_profile_post_comments.recent_first.limit(25).map do |comment|
  29. {
  30. author_username: comment.author_username,
  31. body: comment.body,
  32. commented_at: comment.commented_at&.iso8601
  33. }
  34. end
  35. },
  36. author_profile: {
  37. username: profile.username,
  38. display_name: profile.display_name,
  39. bio: profile.bio,
  40. can_message: profile.can_message,
  41. tags: profile.profile_tags.pluck(:name).sort
  42. },
  43. rules: {
  44. require_manual_review: true,
  45. style: "gen_z_light"
  46. }
  47. }
  48. end
  49. def media_payload
  50. return none_media_payload(reason: "media_missing") unless post.media.attached?
  51. blob = post.media.blob
  52. return none_media_payload(reason: "media_blob_missing") unless blob
  53. content_type = blob.content_type.to_s
  54. byte_size = blob.byte_size.to_i
  55. is_image = content_type.start_with?("image/")
  56. is_video = content_type.start_with?("video/")
  57. return none_media_payload(reason: "unsupported_content_type", content_type: content_type) unless is_image || is_video
  58. return none_media_payload(reason: "zero_byte_blob", content_type: content_type, byte_size: byte_size) if byte_size <= 0
  59. return none_media_payload(reason: "media_too_large", content_type: content_type, byte_size: byte_size, max_bytes: MAX_ABSOLUTE_MEDIA_BYTES) if byte_size > MAX_ABSOLUTE_MEDIA_BYTES
  60. media_type = is_video ? "video" : "image"
  61. media_url = normalize_url(post.source_media_url)
  62. if is_image && byte_size > MAX_INLINE_IMAGE_BYTES && media_url.present?
  63. return url_media_payload(type: media_type, content_type: content_type, url: media_url, byte_size: byte_size)
  64. end
  65. if is_video && byte_size > MAX_INLINE_VIDEO_BYTES && media_url.present?
  66. return url_media_payload(type: media_type, content_type: content_type, url: media_url, byte_size: byte_size)
  67. end
  68. if is_video && byte_size > MAX_DIRECT_VIDEO_ANALYSIS_BYTES
  69. return none_media_payload(
  70. reason: "video_too_large_for_direct_analysis",
  71. content_type: content_type,
  72. byte_size: byte_size,
  73. max_bytes: MAX_DIRECT_VIDEO_ANALYSIS_BYTES
  74. )
  75. end
  76. data =
  77. if is_image && byte_size > MAX_DIRECT_IMAGE_ANALYSIS_BYTES
  78. resize_image_blob(blob: blob)
  79. else
  80. blob.download
  81. end
  82. data = data.to_s.b
  83. return none_media_payload(reason: "media_bytes_missing", content_type: content_type, byte_size: byte_size) if data.blank?
  84. return none_media_payload(reason: "media_bytes_too_small", content_type: content_type, byte_size: data.bytesize, min_bytes: MIN_MEDIA_BYTES) if data.bytesize < MIN_MEDIA_BYTES
  85. return none_media_payload(reason: "media_signature_invalid", content_type: content_type, byte_size: data.bytesize) unless valid_signature?(content_type: content_type, bytes: data)
  86. payload = {
  87. type: media_type,
  88. content_type: content_type,
  89. bytes: data,
  90. source: (is_image && byte_size > MAX_DIRECT_IMAGE_ANALYSIS_BYTES) ? "resized_blob" : "blob",
  91. byte_size: data.bytesize
  92. }
  93. if is_image && data.bytesize <= MAX_INLINE_IMAGE_BYTES
  94. encoded = Base64.strict_encode64(data)
  95. payload[:image_data_url] = "data:#{content_type};base64,#{encoded}"
  96. end
  97. payload
  98. rescue StandardError => e
  99. none_media_payload(
  100. reason: "media_payload_error",
  101. content_type: blob&.content_type.to_s,
  102. byte_size: blob&.byte_size.to_i,
  103. error: "#{e.class}: #{e.message}"
  104. )
  105. end
  106. def media_fingerprint(media: nil)
  107. fingerprint = post.media_url_fingerprint.to_s
  108. return fingerprint if fingerprint.present?
  109. if post.media.attached?
  110. checksum = post.media.blob&.checksum.to_s
  111. return "blob:#{checksum}" if checksum.present?
  112. end
  113. normalized_url = normalize_url(post.source_media_url)
  114. return Digest::SHA256.hexdigest(normalized_url) if normalized_url.present?
  115. payload = media || media_payload
  116. bytes = payload[:bytes]
  117. return Digest::SHA256.hexdigest(bytes) if bytes.present?
  118. nil
  119. end
  120. def detection_image_payload
  121. return { skipped: true, reason: "media_missing" } unless post.media.attached?
  122. content_type = post.media.blob&.content_type.to_s
  123. if content_type.start_with?("image/")
  124. return {
  125. skipped: false,
  126. image_bytes: post.media.download,
  127. detection_source: "post_media_image",
  128. content_type: content_type
  129. }
  130. end
  131. if content_type.start_with?("video/")
  132. if post.preview_image.attached?
  133. return {
  134. skipped: false,
  135. image_bytes: post.preview_image.download,
  136. detection_source: "post_preview_image",
  137. content_type: post.preview_image.blob&.content_type.to_s
  138. }
  139. end
  140. begin
  141. generated_preview = post.media.preview(resize_to_limit: [ 960, 960 ]).processed
  142. preview_blob = generated_preview.respond_to?(:image) ? generated_preview.image : nil
  143. return {
  144. skipped: false,
  145. image_bytes: generated_preview.download,
  146. detection_source: "post_generated_video_preview",
  147. content_type: preview_blob&.content_type.to_s.presence || "image/jpeg"
  148. }
  149. rescue StandardError
  150. return {
  151. skipped: true,
  152. reason: "video_preview_unavailable",
  153. content_type: content_type
  154. }
  155. end
  156. end
  157. {
  158. skipped: true,
  159. reason: "unsupported_content_type",
  160. content_type: content_type
  161. }
  162. rescue StandardError => e
  163. {
  164. skipped: true,
  165. reason: "media_load_error",
  166. error: e.message.to_s,
  167. content_type: content_type.to_s
  168. }
  169. end
  170. def video_payload
  171. return { skipped: true, reason: "media_missing" } unless post.media.attached?
  172. blob = post.media.blob
  173. content_type = blob&.content_type.to_s
  174. return { skipped: true, reason: "not_video", content_type: content_type } unless content_type.to_s.start_with?("video/")
  175. if blob.byte_size.to_i > MAX_VIDEO_FRAME_ANALYSIS_BYTES
  176. return {
  177. skipped: true,
  178. reason: "video_too_large_for_frame_analysis",
  179. content_type: content_type,
  180. byte_size: blob.byte_size.to_i,
  181. max_bytes: MAX_VIDEO_FRAME_ANALYSIS_BYTES
  182. }
  183. end
  184. {
  185. skipped: false,
  186. video_bytes: blob.download,
  187. content_type: content_type,
  188. reference_id: "post_media_#{post.id}"
  189. }
  190. rescue StandardError => e
  191. {
  192. skipped: true,
  193. reason: "video_load_error",
  194. error: e.message.to_s
  195. }
  196. end
  197. private
  198. def url_media_payload(type:, content_type:, url:, byte_size:)
  199. {
  200. type: type.to_s,
  201. content_type: content_type.to_s,
  202. url: url.to_s,
  203. source: "source_media_url",
  204. byte_size: byte_size.to_i
  205. }
  206. end
  207. def none_media_payload(reason:, content_type: nil, byte_size: nil, max_bytes: nil, min_bytes: nil, error: nil)
  208. {
  209. type: "none",
  210. reason: reason.to_s,
  211. content_type: content_type.to_s.presence,
  212. byte_size: byte_size,
  213. max_bytes: max_bytes,
  214. min_bytes: min_bytes,
  215. error: error.to_s.presence
  216. }.compact
  217. end
  218. def resize_image_blob(blob:)
  219. variant = post.media.variant(resize_to_limit: [ IMAGE_RESIZE_MAX_DIMENSION, IMAGE_RESIZE_MAX_DIMENSION ])
  220. variant.processed.download
  221. rescue StandardError
  222. blob.download
  223. end
  224. def valid_signature?(content_type:, bytes:)
  225. type = content_type.to_s.downcase
  226. return false if bytes.blank?
  227. if type.include?("jpeg")
  228. return bytes.start_with?("\xFF\xD8".b)
  229. end
  230. if type.include?("png")
  231. return bytes.start_with?("\x89PNG\r\n\x1A\n".b)
  232. end
  233. if type.include?("gif")
  234. return bytes.start_with?("GIF87a".b) || bytes.start_with?("GIF89a".b)
  235. end
  236. if type.include?("webp")
  237. return bytes.bytesize >= 12 && bytes.byteslice(0, 4) == "RIFF" && bytes.byteslice(8, 4) == "WEBP"
  238. end
  239. if type.include?("heic") || type.include?("heif")
  240. return bytes.bytesize >= 12 && bytes.byteslice(4, 4) == "ftyp"
  241. end
  242. if type.start_with?("video/")
  243. return bytes.bytesize >= 12 && bytes.byteslice(4, 4) == "ftyp" if type.include?("mp4") || type.include?("quicktime")
  244. return bytes.bytesize >= 4 && bytes.byteslice(0, 4) == "\x1A\x45\xDF\xA3".b if type.include?("webm")
  245. end
  246. true
  247. end
  248. def normalize_url(raw)
  249. value = raw.to_s.strip
  250. return nil if value.blank?
  251. uri = URI.parse(value)
  252. return value unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
  253. "#{uri.scheme}://#{uri.host}#{uri.path}"
  254. rescue StandardError
  255. value
  256. end
  257. end
  258. end

app/services/ai/post_analysis_pipeline_state.rb

0.0% lines covered

100.0% branches covered

239 relevant lines. 0 lines covered and 239 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "securerandom"
  2. module Ai
  3. class PostAnalysisPipelineState
  4. STEP_KEYS = %w[visual face ocr video metadata].freeze
  5. TERMINAL_STATUSES = %w[succeeded failed skipped].freeze
  6. PIPELINE_TERMINAL_STATUSES = %w[completed failed].freeze
  7. DEFAULT_TASK_FLAGS = {
  8. "analyze_visual" => true,
  9. "analyze_faces" => true,
  10. "run_ocr" => true,
  11. "run_video" => true,
  12. "run_metadata" => true,
  13. "generate_comments" => true,
  14. "enforce_comment_evidence_policy" => true,
  15. "retry_on_incomplete_profile" => true
  16. }.freeze
  17. TASK_TO_STEP = {
  18. "analyze_visual" => "visual",
  19. "analyze_faces" => "face",
  20. "run_ocr" => "ocr",
  21. "run_video" => "video",
  22. "run_metadata" => "metadata"
  23. }.freeze
  24. def initialize(post:)
  25. @post = post
  26. end
  27. attr_reader :post
  28. def start!(task_flags: {}, source_job: nil, run_id: SecureRandom.uuid)
  29. normalized_flags = normalize_task_flags(task_flags)
  30. required_steps = required_steps_for(flags: normalized_flags)
  31. now = iso_timestamp
  32. post.with_lock do
  33. metadata = metadata_for(post)
  34. metadata.delete("ai_pipeline_failure")
  35. metadata["ai_pipeline"] = {
  36. "run_id" => run_id,
  37. "status" => "running",
  38. "source_job" => source_job.to_s.presence,
  39. "created_at" => now,
  40. "updated_at" => now,
  41. "task_flags" => normalized_flags,
  42. "required_steps" => required_steps,
  43. "steps" => build_initial_steps(required_steps: required_steps, at: now)
  44. }.compact
  45. post.update!(
  46. ai_status: "running",
  47. analyzed_at: nil,
  48. metadata: metadata
  49. )
  50. end
  51. run_id
  52. end
  53. def pipeline_for(run_id:)
  54. pipeline = current_pipeline
  55. return nil unless pipeline.is_a?(Hash)
  56. return nil unless pipeline["run_id"].to_s == run_id.to_s
  57. pipeline
  58. end
  59. def current_pipeline
  60. metadata_for(post)["ai_pipeline"]
  61. end
  62. def required_steps(run_id:)
  63. pipeline = pipeline_for(run_id: run_id)
  64. return [] unless pipeline.is_a?(Hash)
  65. Array(pipeline["required_steps"]).map(&:to_s)
  66. end
  67. def step_state(run_id:, step:)
  68. pipeline = pipeline_for(run_id: run_id)
  69. return nil unless pipeline.is_a?(Hash)
  70. pipeline.dig("steps", step.to_s)
  71. end
  72. def step_terminal?(run_id:, step:)
  73. TERMINAL_STATUSES.include?(step_state(run_id: run_id, step: step).to_h["status"].to_s)
  74. end
  75. def pipeline_terminal?(run_id:)
  76. PIPELINE_TERMINAL_STATUSES.include?(pipeline_for(run_id: run_id).to_h["status"].to_s)
  77. end
  78. def mark_step_running!(run_id:, step:, queue_name:, active_job_id:)
  79. mark_step!(
  80. run_id: run_id,
  81. step: step,
  82. status: "running",
  83. queue_name: queue_name,
  84. active_job_id: active_job_id,
  85. started_at: iso_timestamp
  86. )
  87. end
  88. def mark_step_queued!(run_id:, step:, queue_name:, active_job_id:, result: nil)
  89. mark_step!(
  90. run_id: run_id,
  91. step: step,
  92. status: "queued",
  93. queue_name: queue_name,
  94. active_job_id: active_job_id,
  95. result: result,
  96. started_at: nil
  97. )
  98. end
  99. def mark_step_completed!(run_id:, step:, status:, result: nil, error: nil)
  100. normalized_status = status.to_s
  101. normalized_status = "failed" unless (TERMINAL_STATUSES + [ "queued", "running", "pending" ]).include?(normalized_status)
  102. mark_step!(
  103. run_id: run_id,
  104. step: step,
  105. status: normalized_status,
  106. result: result,
  107. error: error,
  108. finished_at: iso_timestamp
  109. )
  110. end
  111. def all_required_steps_terminal?(run_id:)
  112. required = required_steps(run_id: run_id)
  113. return false if required.empty?
  114. required.all? do |step|
  115. TERMINAL_STATUSES.include?(step_state(run_id: run_id, step: step).to_h["status"].to_s)
  116. end
  117. end
  118. def core_steps_terminal?(run_id:)
  119. required = required_steps(run_id: run_id)
  120. core = required - [ "metadata" ]
  121. return true if core.empty?
  122. core.all? do |step|
  123. TERMINAL_STATUSES.include?(step_state(run_id: run_id, step: step).to_h["status"].to_s)
  124. end
  125. end
  126. def mark_pipeline_finished!(run_id:, status:, details: nil)
  127. with_pipeline_update(run_id: run_id) do |pipeline, _metadata|
  128. pipeline["status"] = status.to_s
  129. pipeline["updated_at"] = iso_timestamp
  130. pipeline["finished_at"] = iso_timestamp
  131. pipeline["details"] = details if details.present?
  132. end
  133. end
  134. def required_step_pending?(run_id:, step:)
  135. required = required_steps(run_id: run_id)
  136. return false unless required.include?(step.to_s)
  137. step_row = step_state(run_id: run_id, step: step).to_h
  138. step_row["status"].to_s.in?([ "", "pending" ])
  139. end
  140. private
  141. def mark_step!(run_id:, step:, status:, queue_name: nil, active_job_id: nil, result: nil, error: nil, started_at: nil, finished_at: nil)
  142. with_pipeline_update(run_id: run_id) do |pipeline, _metadata|
  143. key = step.to_s
  144. steps = pipeline["steps"].is_a?(Hash) ? pipeline["steps"] : {}
  145. row = steps[key].is_a?(Hash) ? steps[key] : {}
  146. # Count attempts only when a worker actually starts execution.
  147. attempts = row["attempts"].to_i
  148. attempts += 1 if status.to_s == "running"
  149. row["status"] = status.to_s
  150. row["queue_name"] = queue_name if queue_name.present?
  151. row["active_job_id"] = active_job_id if active_job_id.present?
  152. row["started_at"] = started_at if started_at.present?
  153. row["finished_at"] = finished_at if finished_at.present?
  154. row["attempts"] = attempts
  155. row["result"] = result if result.is_a?(Hash)
  156. row["error"] = error.to_s if error.present?
  157. steps[key] = row
  158. pipeline["steps"] = steps
  159. pipeline["updated_at"] = iso_timestamp
  160. end
  161. end
  162. def with_pipeline_update(run_id:)
  163. post.with_lock do
  164. metadata = metadata_for(post)
  165. pipeline = metadata["ai_pipeline"]
  166. return nil unless pipeline.is_a?(Hash)
  167. return nil unless pipeline["run_id"].to_s == run_id.to_s
  168. yield(pipeline, metadata)
  169. metadata["ai_pipeline"] = pipeline
  170. post.update!(metadata: metadata)
  171. pipeline
  172. end
  173. end
  174. def normalize_task_flags(task_flags)
  175. incoming = deep_stringify(task_flags.is_a?(Hash) ? task_flags : {})
  176. normalized = DEFAULT_TASK_FLAGS.deep_dup
  177. incoming.each do |key, value|
  178. next unless normalized.key?(key)
  179. normalized[key] = ActiveModel::Type::Boolean.new.cast(value)
  180. end
  181. normalized["run_video"] = false unless video_media_available?
  182. normalized
  183. end
  184. def required_steps_for(flags:)
  185. TASK_TO_STEP.each_with_object([]) do |(flag_key, step_key), steps|
  186. steps << step_key if ActiveModel::Type::Boolean.new.cast(flags[flag_key])
  187. end
  188. end
  189. def video_media_available?
  190. return false unless post.media.attached?
  191. post.media.blob&.content_type.to_s.start_with?("video/")
  192. rescue StandardError
  193. false
  194. end
  195. def build_initial_steps(required_steps:, at:)
  196. STEP_KEYS.each_with_object({}) do |step, out|
  197. if required_steps.include?(step)
  198. out[step] = {
  199. "status" => "pending",
  200. "attempts" => 0,
  201. "queue_name" => nil,
  202. "active_job_id" => nil,
  203. "started_at" => nil,
  204. "finished_at" => nil,
  205. "result" => {},
  206. "error" => nil,
  207. "created_at" => at
  208. }
  209. else
  210. out[step] = {
  211. "status" => "skipped",
  212. "attempts" => 0,
  213. "result" => { "reason" => "task_disabled" },
  214. "created_at" => at,
  215. "finished_at" => at
  216. }
  217. end
  218. end
  219. end
  220. def metadata_for(record)
  221. value = record.metadata
  222. value.is_a?(Hash) ? value.deep_dup : {}
  223. end
  224. def deep_stringify(value)
  225. case value
  226. when Hash
  227. value.each_with_object({}) do |(key, child), out|
  228. out[key.to_s] = deep_stringify(child)
  229. end
  230. when Array
  231. value.map { |child| deep_stringify(child) }
  232. else
  233. value
  234. end
  235. end
  236. def iso_timestamp
  237. Time.current.iso8601(3)
  238. end
  239. end
  240. end

app/services/ai/post_analyzer.rb

0.0% lines covered

100.0% branches covered

74 relevant lines. 0 lines covered and 74 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "json"
  2. module Ai
  3. class PostAnalyzer
  4. DEFAULT_MODEL = "mistral:7b".freeze
  5. def initialize(client: nil, model: nil)
  6. @client = client || Ai::LocalMicroserviceClient.new
  7. @model = model.presence || DEFAULT_MODEL
  8. end
  9. def analyze!(post_payload:, image_data_url: nil)
  10. system = <<~SYS.strip
  11. You analyze an Instagram feed post payload and optionally an image.
  12. Output MUST be strict JSON. No markdown.
  13. Constraints:
  14. - Do NOT guess sensitive demographics (age, gender, ethnicity, religion, nationality, native place).
  15. - If the payload contains explicit self-declared information, you may repeat it as evidence.
  16. - Decide whether we should store this post (relevant) or ignore it (irrelevant) based on tags/rules in the payload.
  17. - Provide only safe, non-deceptive interaction suggestions.
  18. - Style for generated comments: modern Gen Z voice, light slang, playful energy, and occasional emojis.
  19. - Keep it socially engaging and authentic without being offensive, sexual, manipulative, or overfamiliar.
  20. - First produce a concise, visual image_description; then base comment suggestions on that description.
  21. SYS
  22. user = <<~TXT
  23. INPUT_POST_JSON:
  24. #{JSON.pretty_generate(post_payload)}
  25. Produce JSON with keys:
  26. - image_description: 1-3 sentence visual description of what is happening in the image
  27. - relevant: boolean
  28. - author_type: one of ["personal_user","friend","relative","page","unknown"]
  29. - topics: array of strings
  30. - sentiment: one of ["positive","neutral","negative","mixed","unknown"]
  31. - suggested_actions: array of strings from ["ignore","review","like_suggestion","comment_suggestion"]
  32. - recommended_next_action: one of ["ignore","review","comment_suggestion","like_suggestion"]
  33. - engagement_score: number 0-1
  34. - comment_suggestions: array of 5 short comments (friendly/contextual, Gen Z-style voice, based on image_description, may include emojis)
  35. - personalization_tokens: array of short contextual tokens we can safely reference
  36. - confidence: number 0-1
  37. - evidence: short string
  38. TXT
  39. images = []
  40. images << image_data_url.to_s if image_data_url.to_s.start_with?("data:image/")
  41. messages = [
  42. { role: "system", content: [ { type: "text", text: system } ] },
  43. { role: "user", content: build_user_content(text: user, images: images) }
  44. ]
  45. resp = @client.chat_completions!(
  46. model: @model,
  47. messages: messages,
  48. temperature: 0.2,
  49. usage_category: "report_generation",
  50. usage_context: { workflow: "post_analyzer" }
  51. )
  52. parsed = safe_parse_json(resp[:content])
  53. {
  54. model: @model,
  55. prompt: { system: system, user: user, images_count: images.length },
  56. response_text: resp[:content],
  57. response_raw: resp[:raw],
  58. analysis: parsed
  59. }
  60. end
  61. private
  62. def build_user_content(text:, images:)
  63. out = [ { type: "text", text: text } ]
  64. Array(images).each do |url|
  65. out << { type: "image_url", image_url: { url: url } }
  66. end
  67. out
  68. end
  69. def safe_parse_json(text)
  70. JSON.parse(text.to_s)
  71. rescue StandardError
  72. { "parse_error" => true, "raw_text" => text.to_s }
  73. end
  74. end
  75. end

app/services/ai/post_comment_generation_service.rb

0.0% lines covered

100.0% branches covered

382 relevant lines. 0 lines covered and 382 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Ai
  2. class PostCommentGenerationService
  3. REQUIRED_SIGNAL_KEYS = %w[history face text_context].freeze
  4. MAX_SUGGESTIONS = 8
  5. def initialize(
  6. account:,
  7. profile:,
  8. post:,
  9. preparation_summary: nil,
  10. profile_preparation_service: nil,
  11. comment_generator: nil,
  12. enforce_required_evidence: true
  13. )
  14. @account = account
  15. @profile = profile
  16. @post = post
  17. @preparation_summary = preparation_summary
  18. @profile_preparation_service = profile_preparation_service
  19. @comment_generator = comment_generator
  20. @enforce_required_evidence = ActiveModel::Type::Boolean.new.cast(enforce_required_evidence)
  21. end
  22. def run!
  23. return skipped_result(reason_code: "post_missing") unless post&.persisted?
  24. analysis = normalized_hash(post.analysis)
  25. metadata = normalized_hash(post.metadata)
  26. preparation = prepared_history_summary
  27. face_count = extract_face_count(analysis: analysis, metadata: metadata)
  28. ocr_text = extract_ocr_text(analysis: analysis, metadata: metadata)
  29. transcript = extract_transcript(analysis: analysis, metadata: metadata)
  30. text_context = extract_text_context(analysis: analysis, metadata: metadata)
  31. history_ready = ActiveModel::Type::Boolean.new.cast(preparation["ready_for_comment_generation"])
  32. missing = []
  33. missing << "history" unless history_ready
  34. missing << "face" unless face_count.positive?
  35. missing << "text_context" if text_context.blank?
  36. if missing.any? && enforce_required_evidence?
  37. return persist_blocked!(
  38. analysis: analysis,
  39. metadata: metadata,
  40. preparation: preparation,
  41. missing_signals: missing,
  42. reason_code: "missing_required_evidence"
  43. )
  44. end
  45. topics = merged_topics(analysis: analysis, metadata: metadata)
  46. image_description = build_image_description(
  47. analysis: analysis,
  48. metadata: metadata,
  49. topics: topics,
  50. transcript: transcript
  51. )
  52. if image_description.blank?
  53. return persist_blocked!(
  54. analysis: analysis,
  55. metadata: metadata,
  56. preparation: preparation,
  57. missing_signals: [ "visual_context" ],
  58. reason_code: "missing_visual_context"
  59. )
  60. end
  61. result = comment_generator.generate!(
  62. post_payload: post_payload,
  63. image_description: image_description,
  64. topics: topics,
  65. author_type: inferred_author_type,
  66. historical_comments: historical_comments,
  67. historical_context: historical_context,
  68. profile_preparation: preparation,
  69. verified_profile_history: verified_profile_history,
  70. conversational_voice: conversational_voice,
  71. cv_ocr_evidence: build_comment_context_payload(
  72. analysis: analysis,
  73. metadata: metadata,
  74. topics: topics,
  75. transcript: transcript,
  76. ocr_text: ocr_text
  77. )
  78. )
  79. suggestions = normalize_suggestions(result[:comment_suggestions])
  80. if suggestions.empty?
  81. return persist_blocked!(
  82. analysis: analysis,
  83. metadata: metadata,
  84. preparation: preparation,
  85. missing_signals: [ "generation_output" ],
  86. reason_code: "comment_generation_empty",
  87. error_message: result[:error_message].to_s.presence || "Comment generation produced no valid suggestions."
  88. )
  89. end
  90. analysis["comment_suggestions"] = suggestions
  91. analysis["comment_generation_status"] = result[:status].to_s.presence || "ok"
  92. analysis["comment_generation_source"] = result[:source].to_s.presence || "ollama"
  93. analysis["comment_generation_fallback_used"] = ActiveModel::Type::Boolean.new.cast(result[:fallback_used])
  94. analysis["comment_generation_error"] = result[:error_message].to_s.presence
  95. metadata["comment_generation_policy"] = {
  96. "status" => missing.any? ? "enabled_with_missing_required_evidence" : "enabled",
  97. "required_signals" => REQUIRED_SIGNAL_KEYS,
  98. "missing_signals" => missing.any? ? missing : [],
  99. "enforce_required_evidence" => enforce_required_evidence?,
  100. "history_ready" => history_ready,
  101. "history_reason_code" => preparation["reason_code"].to_s.presence,
  102. "face_count" => face_count,
  103. "text_context_present" => text_context.present?,
  104. "ocr_text_present" => ocr_text.present?,
  105. "transcript_present" => transcript.present?,
  106. "updated_at" => Time.current.iso8601(3)
  107. }.compact
  108. post.update!(analysis: analysis, metadata: metadata)
  109. {
  110. blocked: false,
  111. status: analysis["comment_generation_status"],
  112. source: analysis["comment_generation_source"],
  113. suggestions_count: suggestions.length,
  114. reason_code: nil,
  115. history_reason_code: preparation["reason_code"].to_s.presence
  116. }
  117. rescue StandardError => e
  118. analysis = normalized_hash(post&.analysis)
  119. metadata = normalized_hash(post&.metadata)
  120. persist_blocked!(
  121. analysis: analysis,
  122. metadata: metadata,
  123. preparation: prepared_history_summary,
  124. missing_signals: [ "generation_error" ],
  125. reason_code: "comment_generation_error",
  126. error_message: "#{e.class}: #{e.message}"
  127. )
  128. end
  129. private
  130. attr_reader :account, :profile, :post
  131. def prepared_history_summary
  132. return @prepared_history_summary if defined?(@prepared_history_summary)
  133. @prepared_history_summary =
  134. if @preparation_summary.is_a?(Hash)
  135. @preparation_summary
  136. else
  137. service =
  138. @profile_preparation_service ||
  139. Ai::ProfileCommentPreparationService.new(
  140. account: account,
  141. profile: profile,
  142. analyze_missing_posts: false
  143. )
  144. service.prepare!(force: false)
  145. end
  146. rescue StandardError => e
  147. {
  148. "ready_for_comment_generation" => false,
  149. "reason_code" => "profile_preparation_failed",
  150. "reason" => e.message.to_s,
  151. "error_class" => e.class.name
  152. }
  153. end
  154. def comment_generator
  155. @comment_generator ||=
  156. Ai::LocalEngagementCommentGenerator.new(
  157. ollama_client: Ai::OllamaClient.new,
  158. model: preferred_model
  159. )
  160. end
  161. def preferred_model
  162. row = profile&.latest_analysis&.ai_provider_setting
  163. row&.config_value("ollama_model").to_s.presence || "mistral:7b"
  164. rescue StandardError
  165. "mistral:7b"
  166. end
  167. def post_payload
  168. builder = Ai::PostAnalysisContextBuilder.new(profile: profile, post: post)
  169. payload = builder.payload
  170. payload[:rules] = (payload[:rules].is_a?(Hash) ? payload[:rules] : {}).merge(
  171. require_history_context: true,
  172. require_face_signal: true,
  173. require_ocr_signal: true,
  174. require_text_context: true
  175. )
  176. payload
  177. rescue StandardError
  178. {}
  179. end
  180. def inferred_author_type
  181. tags = profile.profile_tags.pluck(:name).map(&:to_s)
  182. return "relative" if tags.include?("relative")
  183. return "friend" if tags.include?("friend") || tags.include?("female_friend") || tags.include?("male_friend")
  184. return "page" if tags.include?("page")
  185. return "personal_user" if tags.include?("personal_user")
  186. "unknown"
  187. rescue StandardError
  188. "unknown"
  189. end
  190. def historical_comments
  191. rows = profile.instagram_profile_events.where(kind: "post_comment_sent").order(detected_at: :desc, id: :desc).limit(20).pluck(:metadata)
  192. out = rows.filter_map do |meta|
  193. row = meta.is_a?(Hash) ? meta : {}
  194. row["comment_text"].to_s.strip.presence
  195. end
  196. out.uniq.first(12)
  197. rescue StandardError
  198. []
  199. end
  200. def historical_context
  201. profile.history_narrative_text(max_chunks: 4).to_s
  202. rescue StandardError
  203. ""
  204. end
  205. def verified_profile_history
  206. rows = profile.instagram_profile_posts
  207. .where(ai_status: "analyzed")
  208. .where.not(id: post.id)
  209. .includes(:instagram_post_faces)
  210. .recent_first
  211. .limit(8)
  212. rows.map do |row|
  213. analysis = normalized_hash(row.analysis)
  214. {
  215. shortcode: row.shortcode.to_s,
  216. taken_at: row.taken_at&.iso8601,
  217. topics: normalized_topics(analysis["topics"]).first(8),
  218. objects: normalized_topics(analysis["objects"]).first(8),
  219. hashtags: normalized_topics(analysis["hashtags"]).first(8),
  220. mentions: normalized_topics(analysis["mentions"]).first(8),
  221. face_count: row.instagram_post_faces.size,
  222. image_description: analysis["image_description"].to_s.byteslice(0, 220)
  223. }
  224. end
  225. rescue StandardError
  226. []
  227. end
  228. def conversational_voice
  229. summary = profile.instagram_profile_behavior_profile&.behavioral_summary
  230. summary = {} unless summary.is_a?(Hash)
  231. {
  232. profile_tags: profile.profile_tags.pluck(:name).map(&:to_s).uniq.first(10),
  233. recurring_topics: hash_keys(summary["topic_clusters"]),
  234. recurring_hashtags: hash_keys(summary["top_hashtags"]),
  235. frequent_people_labels: frequent_people_labels(summary["frequent_secondary_persons"])
  236. }
  237. rescue StandardError
  238. {}
  239. end
  240. def hash_keys(value)
  241. return [] unless value.is_a?(Hash)
  242. value.keys.map(&:to_s).map(&:strip).reject(&:blank?).first(10)
  243. end
  244. def frequent_people_labels(value)
  245. Array(value).filter_map do |row|
  246. next unless row.is_a?(Hash)
  247. row["label"].to_s.presence || row[:label].to_s.presence
  248. end.map(&:to_s).map(&:strip).reject(&:blank?).uniq.first(8)
  249. end
  250. def normalized_topics(value)
  251. Array(value).map(&:to_s).map(&:strip).reject(&:blank?).uniq
  252. end
  253. def merged_topics(analysis:, metadata:)
  254. normalized_topics(
  255. normalized_topics(analysis["topics"]) +
  256. normalized_topics(analysis["video_topics"]) +
  257. normalized_topics(analysis["video_objects"]) +
  258. normalized_topics(analysis["video_hashtags"]) +
  259. normalized_topics(metadata.dig("video_processing", "topics")) +
  260. normalized_topics(metadata.dig("video_processing", "objects")) +
  261. normalized_topics(metadata.dig("video_processing", "hashtags"))
  262. )
  263. end
  264. def normalize_suggestions(value)
  265. Array(value).filter_map do |raw|
  266. text = raw.to_s.gsub(/\s+/, " ").strip
  267. next if text.blank?
  268. text.byteslice(0, 140)
  269. end.uniq.first(MAX_SUGGESTIONS)
  270. end
  271. def extract_face_count(analysis:, metadata:)
  272. summary_face_count = analysis.dig("face_summary", "face_count").to_i
  273. return summary_face_count if summary_face_count.positive?
  274. metadata.dig("face_recognition", "face_count").to_i
  275. end
  276. def extract_ocr_text(analysis:, metadata:)
  277. analysis["ocr_text"].to_s.strip.presence ||
  278. analysis["video_ocr_text"].to_s.strip.presence ||
  279. metadata.dig("ocr_analysis", "ocr_text").to_s.strip.presence ||
  280. metadata.dig("video_processing", "ocr_text").to_s.strip.presence
  281. end
  282. def extract_transcript(analysis:, metadata:)
  283. analysis["transcript"].to_s.strip.presence ||
  284. metadata.dig("video_processing", "transcript").to_s.strip.presence
  285. end
  286. def extract_text_context(analysis:, metadata:)
  287. [ extract_ocr_text(analysis: analysis, metadata: metadata), extract_transcript(analysis: analysis, metadata: metadata) ]
  288. .map(&:to_s)
  289. .map(&:strip)
  290. .reject(&:blank?)
  291. .join("\n")
  292. .presence
  293. end
  294. def build_image_description(analysis:, metadata:, topics:, transcript:)
  295. description = analysis["image_description"].to_s.strip
  296. if description.blank? && topics.any?
  297. description = "Detected visual signals: #{topics.first(6).join(', ')}."
  298. end
  299. video_summary = analysis["video_context_summary"].to_s.strip.presence || metadata.dig("video_processing", "context_summary").to_s.strip.presence
  300. if description.present? && video_summary.present?
  301. description = "#{description} #{video_summary}".strip
  302. elsif description.blank? && video_summary.present?
  303. description = video_summary
  304. end
  305. if transcript.to_s.present?
  306. transcript_excerpt = transcript.to_s.gsub(/\s+/, " ").strip.byteslice(0, 220)
  307. snippet = "Audio transcript: #{transcript_excerpt}."
  308. description = [ description, snippet ].compact.join(" ").strip
  309. end
  310. description.presence
  311. end
  312. def build_comment_context_payload(analysis:, metadata:, topics:, transcript:, ocr_text:)
  313. {
  314. source: "post_analysis",
  315. media_type: analysis["video_semantic_route"].to_s.presence || metadata.dig("video_processing", "semantic_route").to_s.presence || "image",
  316. objects: topics.first(20),
  317. hashtags: normalized_topics(analysis["hashtags"]).first(20),
  318. mentions: normalized_topics(analysis["mentions"]).first(20),
  319. profile_handles: normalized_topics(analysis["video_profile_handles"]).first(20),
  320. scenes: Array(analysis["video_scenes"]).select { |row| row.is_a?(Hash) }.first(20),
  321. ocr_text: ocr_text.to_s.presence,
  322. transcript: transcript.to_s.presence
  323. }.compact
  324. end
  325. def persist_blocked!(analysis:, metadata:, preparation:, missing_signals:, reason_code:, error_message: nil)
  326. analysis = normalized_hash(analysis)
  327. metadata = normalized_hash(metadata)
  328. missing = Array(missing_signals).map(&:to_s).map(&:strip).reject(&:blank?).uniq
  329. reason = blocked_reason(preparation: preparation, missing_signals: missing, fallback_reason_code: reason_code)
  330. analysis["comment_suggestions"] = []
  331. analysis["comment_generation_status"] = "blocked_missing_required_evidence"
  332. analysis["comment_generation_source"] = "policy"
  333. analysis["comment_generation_fallback_used"] = false
  334. analysis["comment_generation_error"] = error_message.to_s.presence || reason
  335. metadata["comment_generation_policy"] = {
  336. "status" => "blocked",
  337. "required_signals" => REQUIRED_SIGNAL_KEYS,
  338. "missing_signals" => missing,
  339. "enforce_required_evidence" => enforce_required_evidence?,
  340. "history_ready" => ActiveModel::Type::Boolean.new.cast(preparation["ready_for_comment_generation"]),
  341. "history_reason_code" => preparation["reason_code"].to_s.presence,
  342. "history_reason" => preparation["reason"].to_s.presence,
  343. "blocked_reason_code" => reason_code.to_s.presence || "missing_required_evidence",
  344. "blocked_reason" => reason,
  345. "updated_at" => Time.current.iso8601(3)
  346. }.compact
  347. post.update!(analysis: analysis, metadata: metadata) if post&.persisted?
  348. {
  349. blocked: true,
  350. status: analysis["comment_generation_status"],
  351. source: analysis["comment_generation_source"],
  352. suggestions_count: 0,
  353. reason_code: reason_code.to_s.presence || "missing_required_evidence",
  354. history_reason_code: preparation["reason_code"].to_s.presence
  355. }
  356. end
  357. def blocked_reason(preparation:, missing_signals:, fallback_reason_code:)
  358. parts = []
  359. parts << "history_not_ready(#{preparation['reason_code']})" if missing_signals.include?("history")
  360. parts << "face_signal_missing" if missing_signals.include?("face")
  361. parts << "text_context_missing(ocr_or_transcript)" if missing_signals.include?("text_context")
  362. parts << fallback_reason_code.to_s if parts.empty?
  363. parts.join(", ")
  364. end
  365. def skipped_result(reason_code:)
  366. {
  367. blocked: true,
  368. status: "skipped",
  369. source: "policy",
  370. suggestions_count: 0,
  371. reason_code: reason_code.to_s,
  372. history_reason_code: nil
  373. }
  374. end
  375. def normalized_hash(value)
  376. value.is_a?(Hash) ? value.deep_dup : {}
  377. end
  378. def enforce_required_evidence?
  379. @enforce_required_evidence
  380. end
  381. end
  382. end

app/services/ai/post_ocr_service.rb

0.0% lines covered

100.0% branches covered

79 relevant lines. 0 lines covered and 79 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Ai
  2. class PostOcrService
  3. def initialize(client: Ai::LocalMicroserviceClient.new)
  4. @client = client
  5. end
  6. def extract_from_image_bytes(image_bytes:, usage_context: {})
  7. return skipped_result(reason: "image_bytes_missing") if image_bytes.blank?
  8. response = @client.analyze_image_bytes!(
  9. image_bytes,
  10. features: [ { type: "TEXT_DETECTION" } ],
  11. usage_category: "ocr",
  12. usage_context: usage_context
  13. )
  14. rows = Array(response["textAnnotations"])
  15. blocks = rows.map do |row|
  16. next unless row.is_a?(Hash)
  17. text = row["description"].to_s.strip
  18. next if text.blank?
  19. {
  20. "text" => text,
  21. "confidence" => row["confidence"].to_f,
  22. "bbox" => normalize_bbox(row.dig("boundingPoly", "vertices")),
  23. "source" => "ocr"
  24. }
  25. end.compact.first(80)
  26. {
  27. skipped: false,
  28. ocr_text: blocks.map { |row| row["text"] }.uniq.join("\n").presence,
  29. ocr_blocks: blocks,
  30. metadata: {
  31. source: "local_microservice_ocr",
  32. block_count: blocks.length
  33. }
  34. }
  35. rescue StandardError => e
  36. {
  37. skipped: true,
  38. ocr_text: nil,
  39. ocr_blocks: [],
  40. metadata: {
  41. source: "local_microservice_ocr",
  42. reason: "ocr_error",
  43. error_class: e.class.name,
  44. error_message: e.message.to_s
  45. }
  46. }
  47. end
  48. private
  49. def skipped_result(reason:)
  50. {
  51. skipped: true,
  52. ocr_text: nil,
  53. ocr_blocks: [],
  54. metadata: {
  55. source: "local_microservice_ocr",
  56. reason: reason
  57. }
  58. }
  59. end
  60. def normalize_bbox(vertices)
  61. points = Array(vertices).map do |row|
  62. next unless row.is_a?(Hash)
  63. x = row["x"]
  64. y = row["y"]
  65. next if x.nil? || y.nil?
  66. [ x.to_f, y.to_f ]
  67. end.compact
  68. return {} if points.empty?
  69. xs = points.map(&:first)
  70. ys = points.map(&:last)
  71. {
  72. "x1" => xs.min,
  73. "y1" => ys.min,
  74. "x2" => xs.max,
  75. "y2" => ys.max
  76. }
  77. end
  78. end
  79. end

app/services/ai/profile_analyzer.rb

0.0% lines covered

100.0% branches covered

77 relevant lines. 0 lines covered and 77 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "json"
  2. module Ai
  3. class ProfileAnalyzer
  4. DEFAULT_MODEL = "mistral:7b".freeze
  5. def initialize(client: nil, model: nil)
  6. @client = client || Ai::LocalMicroserviceClient.new
  7. @model = model.presence || DEFAULT_MODEL
  8. end
  9. def analyze!(profile_payload:, images: [])
  10. system = <<~SYS.strip
  11. You analyze Instagram profile data and produce a compact JSON report that can be used to draft friendly, respectful messages.
  12. Safety/constraints:
  13. - For demographics (age/gender/location), provide cautious estimates only when there is supporting evidence.
  14. - Use a modern, socially natural Gen Z-style voice for message/comment suggestions:
  15. light slang, playful phrasing, mild humor, and selective emojis.
  16. - Keep tone authentic and kind, sexual content, or manipulative language.
  17. - Output MUST be strict JSON (no markdown, no commentary).
  18. SYS
  19. user_text = <<~TXT
  20. INPUT_PAYLOAD_JSON:
  21. #{JSON.pretty_generate(profile_payload)}
  22. Produce JSON with keys:
  23. - summary: short 3-6 sentence summary of interests + tone + interaction style
  24. - languages: array of {language, confidence, evidence}
  25. - likes: array of strings (topics/content likely liked)
  26. - dislikes: array of strings (topics/content likely avoided)
  27. - intent_labels: array of strings from ["friendship","networking","business","flirting","unknown"]
  28. - conversation_hooks: array of {hook, evidence}
  29. - personalization_tokens: array of safe, non-sensitive details we can mention
  30. - no_go_zones: array of topics/styles to avoid
  31. - writing_style: {tone, formality, emoji_usage, slang_level, evidence}
  32. - response_style_prediction: one of ["short","medium","long","unknown"]
  33. - engagement_probability: number 0-1
  34. - recommended_next_action: one of ["dm","comment","wait","ignore","review"]
  35. - demographic_estimates: {age, age_confidence, gender, gender_confidence, location, location_confidence, evidence}
  36. - self_declared: {age, gender, location, pronouns, other}
  37. - suggested_dm_openers: 5 short openers in friendly Gen Z-style voice (light slang/humor/emojis when natural)
  38. - suggested_comment_templates: 5 short comment templates in the same voice
  39. - confidence_notes: short string describing what was/wasn't available
  40. - why_not_confident: short string listing missing signals that reduced confidence
  41. TXT
  42. messages = [
  43. { role: "system", content: [ { type: "text", text: system } ] },
  44. { role: "user", content: build_user_content(text: user_text, images: images) }
  45. ]
  46. resp = @client.chat_completions!(
  47. model: @model,
  48. messages: messages,
  49. temperature: 0.2,
  50. usage_category: "report_generation",
  51. usage_context: { workflow: "profile_analyzer" }
  52. )
  53. parsed = safe_parse_json(resp[:content])
  54. {
  55. model: @model,
  56. prompt: { system: system, user: user_text, images_count: images.length },
  57. response_text: resp[:content],
  58. response_raw: resp[:raw],
  59. analysis: parsed
  60. }
  61. end
  62. private
  63. def build_user_content(text:, images:)
  64. out = [ { type: "text", text: text } ]
  65. Array(images).each do |img|
  66. url = img.to_s.strip
  67. next if url.blank?
  68. out << { type: "image_url", image_url: { url: url } }
  69. end
  70. out
  71. end
  72. def safe_parse_json(text)
  73. JSON.parse(text.to_s)
  74. rescue StandardError
  75. { "parse_error" => true, "raw_text" => text.to_s }
  76. end
  77. end
  78. end

app/services/ai/profile_auto_tagger.rb

0.0% lines covered

100.0% branches covered

45 relevant lines. 0 lines covered and 45 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Ai
  2. class ProfileAutoTagger
  3. TAG_KEYS = %w[personal_user friend female_friend male_friend relative page excluded automatic_reply].freeze
  4. class << self
  5. def sync_from_post_analysis!(profile:, analysis:)
  6. return unless profile
  7. return unless analysis.is_a?(Hash)
  8. inferred = infer_tags(profile: profile, analysis: analysis)
  9. return if inferred.empty?
  10. existing = profile.profile_tags.pluck(:name)
  11. desired = (existing + inferred).uniq
  12. tags = desired.filter_map do |name|
  13. next unless TAG_KEYS.include?(name.to_s)
  14. ProfileTag.find_or_create_by!(name: name.to_s)
  15. end
  16. profile.profile_tags = tags
  17. profile.save!
  18. rescue StandardError
  19. nil
  20. end
  21. private
  22. def infer_tags(profile:, analysis:)
  23. tags = []
  24. author_type = analysis["author_type"].to_s
  25. relevant = analysis["relevant"]
  26. confidence = analysis["confidence"].to_f
  27. case author_type
  28. when "page"
  29. tags << "page"
  30. when "relative"
  31. tags << "relative"
  32. when "friend"
  33. tags << "friend"
  34. when "personal_user"
  35. tags << "personal_user"
  36. end
  37. tags << "excluded" if relevant == false && confidence >= 0.6
  38. if relevant == true && confidence >= 0.65 && profile.can_message == true
  39. tags << "automatic_reply"
  40. end
  41. tags.uniq
  42. end
  43. end
  44. end
  45. end

app/services/ai/profile_comment_preparation_service.rb

0.0% lines covered

100.0% branches covered

329 relevant lines. 0 lines covered and 329 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Ai
  2. class ProfileCommentPreparationService
  3. DEFAULT_POSTS_LIMIT = 10
  4. DEFAULT_COMMENTS_LIMIT = 12
  5. MAX_POSTS_LIMIT = 20
  6. MIN_REQUIRED_ANALYZED_POSTS = 3
  7. CACHE_TTL = 30.minutes
  8. PREPARATION_VERSION = "profile_comment_preparation_v1".freeze
  9. def initialize(
  10. account:,
  11. profile:,
  12. posts_limit: DEFAULT_POSTS_LIMIT,
  13. comments_limit: DEFAULT_COMMENTS_LIMIT,
  14. analyze_missing_posts: true,
  15. collector: nil,
  16. post_analyzer: nil,
  17. user_profile_builder_service: UserProfileBuilderService.new,
  18. face_identity_resolution_service: FaceIdentityResolutionService.new
  19. )
  20. @account = account
  21. @profile = profile
  22. @posts_limit = posts_limit.to_i.clamp(1, MAX_POSTS_LIMIT)
  23. @comments_limit = comments_limit.to_i.clamp(1, 20)
  24. @analyze_missing_posts = ActiveModel::Type::Boolean.new.cast(analyze_missing_posts)
  25. @collector = collector
  26. @post_analyzer = post_analyzer
  27. @user_profile_builder_service = user_profile_builder_service
  28. @face_identity_resolution_service = face_identity_resolution_service
  29. end
  30. def prepare!(force: false)
  31. cached = read_cached_summary
  32. if !force && cache_valid?(cached)
  33. return cached.merge(
  34. "from_cache" => true,
  35. "ready_for_comment_generation" => ActiveModel::Type::Boolean.new.cast(cached["ready_for_comment_generation"])
  36. )
  37. end
  38. collected_posts = collect_recent_posts
  39. recent_posts = load_recent_posts(collected_posts: collected_posts)
  40. analysis = analyze_recent_posts!(recent_posts: recent_posts)
  41. resolve_identities_for_recent_posts!(recent_posts: recent_posts)
  42. @user_profile_builder_service.refresh!(profile: @profile)
  43. identity_consistency = build_identity_consistency
  44. readiness = build_readiness(analysis: analysis, identity_consistency: identity_consistency, recent_posts_count: recent_posts.length)
  45. summary = {
  46. "version" => PREPARATION_VERSION,
  47. "prepared_at" => Time.current.iso8601,
  48. "profile_id" => @profile.id,
  49. "instagram_account_id" => @account.id,
  50. "posts_limit" => @posts_limit,
  51. "comments_limit" => @comments_limit,
  52. "recent_posts_count" => recent_posts.length,
  53. "analysis" => analysis,
  54. "identity_consistency" => identity_consistency,
  55. "ready_for_comment_generation" => readiness[:ready],
  56. "reason_code" => readiness[:reason_code],
  57. "reason" => readiness[:reason]
  58. }
  59. persist_summary(summary)
  60. summary
  61. rescue StandardError => e
  62. summary = {
  63. "version" => PREPARATION_VERSION,
  64. "prepared_at" => Time.current.iso8601,
  65. "profile_id" => @profile&.id,
  66. "instagram_account_id" => @account&.id,
  67. "ready_for_comment_generation" => false,
  68. "reason_code" => "profile_preparation_failed",
  69. "reason" => e.message.to_s,
  70. "error_class" => e.class.name
  71. }
  72. persist_summary(summary)
  73. summary
  74. end
  75. private
  76. def collect_recent_posts
  77. collector = @collector || Instagram::ProfileAnalysisCollector.new(account: @account, profile: @profile)
  78. result = collector.collect_and_persist!(posts_limit: @posts_limit, comments_limit: @comments_limit)
  79. Array(result[:posts]).compact
  80. rescue StandardError
  81. []
  82. end
  83. def load_recent_posts(collected_posts:)
  84. rows = Array(collected_posts).select(&:persisted?)
  85. if rows.empty?
  86. rows = @profile.instagram_profile_posts.recent_first.limit(@posts_limit).to_a
  87. end
  88. rows.sort_by { |post| [ post.taken_at || Time.at(0), post.id.to_i ] }.reverse.first(@posts_limit)
  89. end
  90. def analyze_recent_posts!(recent_posts:)
  91. analyzer = @post_analyzer || method(:analyze_post!)
  92. analyzed = 0
  93. pending = 0
  94. failed = []
  95. structured_signals = 0
  96. recent_posts.each do |post|
  97. begin
  98. if !post_analyzed?(post)
  99. if @analyze_missing_posts
  100. analyzer.call(post)
  101. post.reload
  102. else
  103. pending += 1
  104. next
  105. end
  106. end
  107. if post_analyzed?(post)
  108. analyzed += 1
  109. ensure_post_face_recognition!(post: post)
  110. structured_signals += 1 if post_has_structured_signals?(post)
  111. else
  112. pending += 1
  113. end
  114. rescue StandardError => e
  115. failed << {
  116. "post_id" => post.id,
  117. "shortcode" => post.shortcode,
  118. "error" => e.message.to_s
  119. }
  120. end
  121. end
  122. {
  123. "analyzed_posts_count" => analyzed,
  124. "pending_posts_count" => pending,
  125. "failed_posts_count" => failed.length,
  126. "failed_posts" => failed.first(12),
  127. "posts_with_structured_signals_count" => structured_signals,
  128. "latest_posts_analyzed" => (pending.zero? && failed.empty?)
  129. }
  130. end
  131. def analyze_post!(post)
  132. AnalyzeInstagramProfilePostJob.perform_now(
  133. instagram_account_id: @account.id,
  134. instagram_profile_id: @profile.id,
  135. instagram_profile_post_id: post.id,
  136. pipeline_mode: "inline",
  137. task_flags: {
  138. generate_comments: false
  139. }
  140. )
  141. end
  142. def post_analyzed?(post)
  143. post.ai_status.to_s == "analyzed" && post.analyzed_at.present?
  144. end
  145. def ensure_post_face_recognition!(post:)
  146. return unless post.media.attached?
  147. return unless post.media.blob&.content_type.to_s.start_with?("image/")
  148. return if post.instagram_post_faces.exists?
  149. PostFaceRecognitionService.new.process!(post: post)
  150. rescue StandardError
  151. nil
  152. end
  153. def post_has_structured_signals?(post)
  154. analysis = post.analysis.is_a?(Hash) ? post.analysis : {}
  155. image_description = analysis["image_description"].to_s
  156. topics = Array(analysis["topics"])
  157. suggestions = Array(analysis["comment_suggestions"])
  158. entities = analysis["entities"].is_a?(Hash) ? analysis["entities"] : {}
  159. image_description.present? || topics.any? || suggestions.any? || entities.any?
  160. end
  161. def resolve_identities_for_recent_posts!(recent_posts:)
  162. recent_posts.each do |post|
  163. next unless post.instagram_post_faces.exists?
  164. @face_identity_resolution_service.resolve_for_post!(
  165. post: post,
  166. extracted_usernames: extracted_usernames_for_post(post),
  167. content_summary: post.analysis.is_a?(Hash) ? post.analysis : {}
  168. )
  169. rescue StandardError
  170. next
  171. end
  172. end
  173. def extracted_usernames_for_post(post)
  174. analysis = post.analysis.is_a?(Hash) ? post.analysis : {}
  175. rows = []
  176. rows.concat(Array(analysis["mentions"]))
  177. rows.concat(Array(analysis["profile_handles"]))
  178. rows.concat(analysis["ocr_text"].to_s.scan(/@[a-zA-Z0-9._]{2,30}/))
  179. rows.concat(post.caption.to_s.scan(/@[a-zA-Z0-9._]{2,30}/))
  180. rows.map(&:to_s).map(&:strip).reject(&:blank?).uniq.first(20)
  181. end
  182. def build_identity_consistency
  183. counts = InstagramPostFace.joins(:instagram_profile_post)
  184. .where(instagram_profile_posts: { instagram_profile_id: @profile.id })
  185. .where.not(instagram_story_person_id: nil)
  186. .group(:instagram_story_person_id)
  187. .count
  188. total_faces = counts.values.sum.to_i
  189. return {
  190. "consistent" => false,
  191. "reason_code" => "insufficient_face_data",
  192. "reason" => "No recognized faces found across analyzed posts.",
  193. "total_faces" => total_faces
  194. } if total_faces <= 0
  195. person_id, appearances = counts.max_by { |_id, value| value.to_i }
  196. appearances = appearances.to_i
  197. dominance_ratio = (appearances.to_f / total_faces.to_f).round(4)
  198. min_primary_appearances = FaceIdentityResolutionService::MIN_PRIMARY_APPEARANCES
  199. min_primary_ratio = FaceIdentityResolutionService::MIN_PRIMARY_RATIO
  200. person = @profile.instagram_story_people.find_by(id: person_id)
  201. linked_usernames = Array(person&.metadata&.dig("linked_usernames")).map { |value| normalize_username(value) }.reject(&:blank?)
  202. profile_username = normalize_username(@profile.username)
  203. label_username = normalize_username(person&.label)
  204. account_owner_match = linked_usernames.include?(profile_username) ||
  205. label_username == profile_username ||
  206. person&.role.to_s == "primary_user"
  207. consistent = appearances >= min_primary_appearances &&
  208. dominance_ratio >= min_primary_ratio &&
  209. account_owner_match
  210. reason_code =
  211. if !account_owner_match
  212. "primary_identity_not_linked_to_profile"
  213. elsif appearances < min_primary_appearances
  214. "insufficient_primary_appearances"
  215. elsif dominance_ratio < min_primary_ratio
  216. "identity_majority_not_confirmed"
  217. else
  218. "identity_consistent"
  219. end
  220. reason =
  221. if consistent
  222. "Primary identity is consistent across recent analyzed posts."
  223. else
  224. "Primary identity consistency requirements were not met (#{reason_code})."
  225. end
  226. {
  227. "consistent" => consistent,
  228. "reason_code" => reason_code,
  229. "reason" => reason,
  230. "primary_person_id" => person_id,
  231. "primary_role" => person&.role.to_s.presence,
  232. "appearance_count" => appearances,
  233. "total_faces" => total_faces,
  234. "dominance_ratio" => dominance_ratio,
  235. "linked_usernames" => linked_usernames.first(10)
  236. }.compact
  237. rescue StandardError => e
  238. {
  239. "consistent" => false,
  240. "reason_code" => "identity_consistency_error",
  241. "reason" => e.message.to_s,
  242. "error_class" => e.class.name
  243. }
  244. end
  245. def build_readiness(analysis:, identity_consistency:, recent_posts_count:)
  246. analysis_data = analysis.is_a?(Hash) ? analysis : {}
  247. identity_data = identity_consistency.is_a?(Hash) ? identity_consistency : {}
  248. analyzed_posts_count = analysis_data["analyzed_posts_count"].to_i
  249. structured_signals_count = analysis_data["posts_with_structured_signals_count"].to_i
  250. latest_posts_analyzed = ActiveModel::Type::Boolean.new.cast(analysis_data["latest_posts_analyzed"])
  251. identity_consistent = ActiveModel::Type::Boolean.new.cast(identity_data["consistent"])
  252. required_analyzed = [ recent_posts_count.to_i, MIN_REQUIRED_ANALYZED_POSTS ].min
  253. if recent_posts_count.to_i <= 0
  254. return {
  255. ready: false,
  256. reason_code: "no_recent_posts_available",
  257. reason: "No recent posts are available to build verified profile context."
  258. }
  259. end
  260. unless latest_posts_analyzed
  261. return {
  262. ready: false,
  263. reason_code: "latest_posts_not_analyzed",
  264. reason: "Latest posts have not been fully analyzed yet."
  265. }
  266. end
  267. if analyzed_posts_count < required_analyzed
  268. return {
  269. ready: false,
  270. reason_code: "insufficient_analyzed_posts",
  271. reason: "Insufficient analyzed posts for reliable historical context."
  272. }
  273. end
  274. if structured_signals_count <= 0
  275. return {
  276. ready: false,
  277. reason_code: "missing_structured_post_signals",
  278. reason: "Recent posts do not contain enough structured metadata for grounded comments."
  279. }
  280. end
  281. unless identity_consistent
  282. return {
  283. ready: false,
  284. reason_code: identity_data["reason_code"].to_s.presence || "identity_consistency_not_confirmed",
  285. reason: identity_data["reason"].to_s.presence || "Identity consistency could not be confirmed."
  286. }
  287. end
  288. {
  289. ready: true,
  290. reason_code: "profile_context_ready",
  291. reason: "Profile history, latest post analysis, and identity consistency verified."
  292. }
  293. end
  294. def read_cached_summary
  295. metadata = @profile.instagram_profile_behavior_profile&.metadata
  296. return {} unless metadata.is_a?(Hash)
  297. summary = metadata["comment_generation_preparation"]
  298. summary.is_a?(Hash) ? summary : {}
  299. end
  300. def cache_valid?(summary)
  301. prepared_at = parse_time(summary["prepared_at"])
  302. return false unless prepared_at
  303. return false if summary["version"].to_s != PREPARATION_VERSION
  304. prepared_at >= CACHE_TTL.ago
  305. end
  306. def persist_summary(summary)
  307. record = InstagramProfileBehaviorProfile.find_or_initialize_by(instagram_profile: @profile)
  308. metadata = record.metadata.is_a?(Hash) ? record.metadata.deep_dup : {}
  309. metadata["comment_generation_preparation"] = summary
  310. record.metadata = metadata
  311. record.activity_score = record.activity_score.to_f
  312. record.behavioral_summary = {} unless record.behavioral_summary.is_a?(Hash)
  313. record.save!
  314. rescue StandardError
  315. nil
  316. end
  317. def parse_time(value)
  318. return nil if value.to_s.blank?
  319. Time.zone.parse(value.to_s)
  320. rescue StandardError
  321. nil
  322. end
  323. def normalize_username(value)
  324. text = value.to_s.strip.downcase
  325. text = text.delete_prefix("@")
  326. text.presence
  327. end
  328. end
  329. end

app/services/ai/profile_demographics_aggregator.rb

0.0% lines covered

100.0% branches covered

178 relevant lines. 0 lines covered and 178 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "json"
  2. module Ai
  3. class ProfileDemographicsAggregator
  4. DEFAULT_MODEL = "mistral:7b".freeze
  5. def initialize(account:, model: nil)
  6. @account = account
  7. @model = model.to_s.presence || DEFAULT_MODEL
  8. end
  9. def aggregate!(dataset:)
  10. response = call_aggregator_llm(dataset: dataset)
  11. normalized = normalize_result(response)
  12. return normalized if normalized[:ok]
  13. heuristic_fallback(dataset: dataset, error: normalized[:error])
  14. rescue StandardError => e
  15. heuristic_fallback(dataset: dataset, error: e.message)
  16. end
  17. private
  18. def call_aggregator_llm(dataset:)
  19. client = local_client
  20. return nil unless client
  21. prompt = build_prompt(dataset: dataset)
  22. resp = client.generate_text_json!(
  23. model: @model,
  24. prompt: prompt,
  25. temperature: 0.1,
  26. max_output_tokens: 1600,
  27. usage_category: "report_generation",
  28. usage_context: { workflow: "profile_demographics_aggregator" }
  29. )
  30. resp[:json].is_a?(Hash) ? resp[:json] : nil
  31. end
  32. def local_client
  33. Ai::LocalMicroserviceClient.new
  34. end
  35. def build_prompt(dataset:)
  36. <<~PROMPT
  37. You are an AI aggregation engine that consolidates structured JSON analyses over time.
  38. Task:
  39. - Combine profile-level and post-level analysis JSON.
  40. - Infer missing demographics cautiously: age, gender, location.
  41. - Prefer explicit self-declared evidence over weak assumptions.
  42. - Confidence must be 0.0 to 1.0.
  43. - If evidence is weak, return null with low confidence.
  44. Output STRICT JSON only with this schema:
  45. {
  46. "profile_inference": {
  47. "age": 0,
  48. "age_range": "",
  49. "age_confidence": 0.0,
  50. "gender": "",
  51. "gender_indicators": [],
  52. "gender_confidence": 0.0,
  53. "location": "",
  54. "location_signals": [],
  55. "location_confidence": 0.0,
  56. "evidence": "",
  57. "why": ""
  58. },
  59. "post_inferences": [
  60. {
  61. "shortcode": "",
  62. "source_type": "",
  63. "source_ref": "",
  64. "age": 0,
  65. "gender": "",
  66. "location": "",
  67. "confidence": 0.0,
  68. "evidence": "",
  69. "relevant": true
  70. }
  71. ]
  72. }
  73. INPUT_DATASET_JSON:
  74. #{JSON.pretty_generate(dataset)}
  75. PROMPT
  76. end
  77. def normalize_result(raw)
  78. return { ok: false, error: "aggregator_response_blank" } unless raw.is_a?(Hash)
  79. profile_raw = raw["profile_inference"].is_a?(Hash) ? raw["profile_inference"] : {}
  80. post_raw = Array(raw["post_inferences"]).select { |entry| entry.is_a?(Hash) }
  81. profile_inference = {
  82. age: integer_or_nil(profile_raw["age"]),
  83. age_range: clean_text(profile_raw["age_range"]),
  84. age_confidence: float_or_nil(profile_raw["age_confidence"]),
  85. gender: clean_text(profile_raw["gender"]),
  86. gender_indicators: Array(profile_raw["gender_indicators"]).map { |v| clean_text(v) }.compact.first(6),
  87. gender_confidence: float_or_nil(profile_raw["gender_confidence"]),
  88. location: clean_text(profile_raw["location"]),
  89. location_signals: Array(profile_raw["location_signals"]).map { |v| clean_text(v) }.compact.first(8),
  90. location_confidence: float_or_nil(profile_raw["location_confidence"]),
  91. evidence: clean_text(profile_raw["evidence"]),
  92. why: clean_text(profile_raw["why"])
  93. }
  94. post_inferences = post_raw.filter_map do |entry|
  95. shortcode = clean_text(entry["shortcode"])
  96. next if shortcode.blank?
  97. {
  98. shortcode: shortcode,
  99. source_type: clean_text(entry["source_type"]),
  100. source_ref: clean_text(entry["source_ref"]),
  101. age: integer_or_nil(entry["age"]),
  102. gender: clean_text(entry["gender"]),
  103. location: clean_text(entry["location"]),
  104. confidence: float_or_nil(entry["confidence"]),
  105. evidence: clean_text(entry["evidence"]),
  106. relevant: ActiveModel::Type::Boolean.new.cast(entry["relevant"])
  107. }
  108. end
  109. {
  110. ok: true,
  111. source: "json_aggregator_llm",
  112. profile_inference: profile_inference,
  113. post_inferences: post_inferences
  114. }
  115. end
  116. def heuristic_fallback(dataset:, error: nil)
  117. profile_demographics = Array(dataset.dig(:analysis_pool, :profile_demographics))
  118. post_demographics = Array(dataset.dig(:analysis_pool, :post_demographics))
  119. ages = profile_demographics.map { |d| integer_or_nil(d["age"] || d[:age]) }.compact
  120. genders = profile_demographics.map { |d| clean_text(d["gender"] || d[:gender]) }.reject(&:blank?)
  121. locations = profile_demographics.map { |d| clean_text(d["location"] || d[:location]) }.reject(&:blank?)
  122. ages.concat(post_demographics.map { |d| integer_or_nil(d["age"] || d[:age]) }.compact)
  123. genders.concat(post_demographics.map { |d| clean_text(d["gender"] || d[:gender]) }.reject(&:blank?))
  124. locations.concat(post_demographics.map { |d| clean_text(d["location"] || d[:location]) }.reject(&:blank?))
  125. profile_inference = {
  126. age: median(ages),
  127. age_range: ages.any? ? "#{ages.min}-#{ages.max}" : nil,
  128. age_confidence: confidence_from_count(ages.length),
  129. gender: mode(genders),
  130. gender_indicators: genders.group_by(&:itself).sort_by { |_value, bucket| -bucket.length }.first(4).map(&:first),
  131. gender_confidence: confidence_from_count(genders.length),
  132. location: mode(locations),
  133. location_signals: locations.group_by(&:itself).sort_by { |_value, bucket| -bucket.length }.first(5).map(&:first),
  134. location_confidence: confidence_from_count(locations.length),
  135. evidence: "Heuristic consolidation from accumulated analysis JSON.",
  136. why: error.to_s.presence
  137. }
  138. {
  139. ok: true,
  140. source: "heuristic_fallback",
  141. profile_inference: profile_inference,
  142. post_inferences: [],
  143. error: error.to_s.presence
  144. }
  145. end
  146. def integer_or_nil(value)
  147. return nil if value.blank?
  148. Integer(value)
  149. rescue StandardError
  150. nil
  151. end
  152. def float_or_nil(value)
  153. return nil if value.blank?
  154. Float(value).clamp(0.0, 1.0)
  155. rescue StandardError
  156. nil
  157. end
  158. def clean_text(value)
  159. text = value.to_s.strip
  160. text.presence
  161. end
  162. def mode(values)
  163. arr = Array(values).reject(&:blank?)
  164. return nil if arr.empty?
  165. arr.group_by(&:itself).max_by { |_v, bucket| bucket.length }&.first
  166. end
  167. def median(values)
  168. arr = Array(values).compact.sort
  169. return nil if arr.empty?
  170. mid = arr.length / 2
  171. return arr[mid] if arr.length.odd?
  172. ((arr[mid - 1] + arr[mid]) / 2.0).round
  173. end
  174. def confidence_from_count(count)
  175. return nil if count.to_i <= 0
  176. [0.25 + (count.to_i * 0.1), 0.8].min.round(2)
  177. end
  178. end
  179. end

app/services/ai/profile_history_build_service.rb

0.0% lines covered

100.0% branches covered

782 relevant lines. 0 lines covered and 782 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Ai
  2. class ProfileHistoryBuildService
  3. TARGET_ANALYZED_POSTS = 20
  4. TARGET_CAPTURED_POSTS = 50
  5. COLLECTION_COMMENTS_LIMIT = 20
  6. FACE_RECENCY_REFRESH_DAYS = 7
  7. FACE_REFRESH_MAX_ENQUEUE_PER_RUN = ENV.fetch("PROFILE_HISTORY_FACE_REFRESH_MAX_ENQUEUE_PER_RUN", "6").to_i.clamp(1, 20)
  8. FACE_REFRESH_PENDING_WINDOW_HOURS = ENV.fetch("PROFILE_HISTORY_FACE_REFRESH_PENDING_WINDOW_HOURS", "6").to_i.clamp(1, 24)
  9. FACE_VERIFICATION_MIN_APPEARANCES = FaceIdentityResolutionService::MIN_PRIMARY_APPEARANCES
  10. FACE_VERIFICATION_MIN_RATIO = FaceIdentityResolutionService::MIN_PRIMARY_RATIO
  11. PROFILE_INCOMPLETE_REASON_CODES =
  12. if defined?(ProcessPostMetadataTaggingJob::PROFILE_INCOMPLETE_REASON_CODES)
  13. ProcessPostMetadataTaggingJob::PROFILE_INCOMPLETE_REASON_CODES
  14. else
  15. %w[
  16. latest_posts_not_analyzed
  17. insufficient_analyzed_posts
  18. no_recent_posts_available
  19. missing_structured_post_signals
  20. profile_preparation_failed
  21. profile_preparation_error
  22. ].freeze
  23. end
  24. def initialize(
  25. account:,
  26. profile:,
  27. collector: nil,
  28. face_identity_resolution_service: FaceIdentityResolutionService.new
  29. )
  30. @account = account
  31. @profile = profile
  32. @collector = collector
  33. @face_identity_resolution_service = face_identity_resolution_service
  34. end
  35. def execute!
  36. policy_decision = Instagram::ProfileScanPolicy.new(profile: @profile).decision
  37. if ActiveModel::Type::Boolean.new.cast(policy_decision[:skip_post_analysis])
  38. return persist_and_result!(
  39. status: "blocked",
  40. ready: false,
  41. reason_code: policy_decision[:reason_code].to_s.presence || "profile_scan_policy_blocked",
  42. reason: policy_decision[:reason].to_s.presence || "Profile is blocked by scan policy.",
  43. checks: default_checks,
  44. queue_state: default_queue_state,
  45. preparation: {},
  46. face_verification: default_face_verification,
  47. conversation: default_conversation_state(ready: false)
  48. )
  49. end
  50. collection = collect_posts
  51. latest_50_posts = active_posts_scope.recent_first.limit(TARGET_CAPTURED_POSTS).to_a
  52. latest_20_posts = active_posts_scope.recent_first.limit(TARGET_ANALYZED_POSTS).to_a
  53. checks = build_capture_checks(collection: collection, latest_50_posts: latest_50_posts, latest_20_posts: latest_20_posts)
  54. download_queue = queue_missing_media_downloads(posts: latest_50_posts)
  55. analysis_queue = queue_missing_post_analysis(posts: latest_20_posts)
  56. preparation = prepare_history_summary(latest_20_posts: latest_20_posts)
  57. face_verification = verify_face_identity(latest_posts: latest_50_posts)
  58. queue_state = build_queue_state(
  59. download_queue: download_queue,
  60. analysis_queue: analysis_queue,
  61. face_refresh_queue: face_refresh_queue_state(face_verification: face_verification)
  62. )
  63. queue_work_pending = queue_state["downloads_queued"].to_i.positive? ||
  64. queue_state["downloads_pending"].to_i.positive? ||
  65. queue_state["analyses_queued"].to_i.positive? ||
  66. queue_state["analyses_pending"].to_i.positive? ||
  67. queue_state["face_refresh_queued"].to_i.positive? ||
  68. queue_state["face_refresh_pending"].to_i.positive? ||
  69. queue_state["face_refresh_deferred"].to_i.positive?
  70. preparation_ready = ActiveModel::Type::Boolean.new.cast(preparation["ready_for_comment_generation"])
  71. face_ready = ActiveModel::Type::Boolean.new.cast(face_verification["confirmed"])
  72. history_ready = checks.values.all? { |row| ActiveModel::Type::Boolean.new.cast(row["ready"]) } &&
  73. !queue_work_pending &&
  74. preparation_ready &&
  75. face_ready
  76. reason_code, reason = resolve_reason(
  77. checks: checks,
  78. queue_state: queue_state,
  79. preparation: preparation,
  80. face_verification: face_verification,
  81. history_ready: history_ready
  82. )
  83. conversation = build_conversation_state(ready: history_ready)
  84. persist_and_result!(
  85. status: history_ready ? "ready" : "pending",
  86. ready: history_ready,
  87. reason_code: reason_code,
  88. reason: reason,
  89. checks: checks,
  90. queue_state: queue_state,
  91. preparation: preparation,
  92. face_verification: face_verification,
  93. conversation: conversation
  94. )
  95. rescue StandardError => e
  96. persist_and_result!(
  97. status: "pending",
  98. ready: false,
  99. reason_code: "history_build_failed",
  100. reason: e.message.to_s,
  101. checks: default_checks,
  102. queue_state: default_queue_state,
  103. preparation: {
  104. "ready_for_comment_generation" => false,
  105. "reason_code" => "profile_preparation_error",
  106. "reason" => e.message.to_s
  107. },
  108. face_verification: default_face_verification,
  109. conversation: default_conversation_state(ready: false)
  110. )
  111. end
  112. private
  113. def collect_posts
  114. collector = @collector || Instagram::ProfileAnalysisCollector.new(account: @account, profile: @profile)
  115. collector.collect_and_persist!(
  116. posts_limit: nil,
  117. comments_limit: COLLECTION_COMMENTS_LIMIT,
  118. track_missing_as_deleted: true,
  119. sync_source: "profile_history_build",
  120. download_media: false
  121. )
  122. rescue StandardError => e
  123. {
  124. summary: {
  125. feed_fetch: {},
  126. collection_error: "#{e.class}: #{e.message}"
  127. }
  128. }
  129. end
  130. def build_capture_checks(collection:, latest_50_posts:, latest_20_posts:)
  131. summary = collection.is_a?(Hash) ? collection[:summary] : {}
  132. summary = {} unless summary.is_a?(Hash)
  133. feed_fetch = summary[:feed_fetch].is_a?(Hash) ? summary[:feed_fetch] : {}
  134. feed_fetch = summary["feed_fetch"] if feed_fetch.blank? && summary["feed_fetch"].is_a?(Hash)
  135. feed_fetch ||= {}
  136. more_available = ActiveModel::Type::Boolean.new.cast(feed_fetch["more_available"] || feed_fetch[:more_available])
  137. collection_error = summary[:collection_error].to_s.presence || summary["collection_error"].to_s.presence
  138. feed_available = feed_fetch.present?
  139. all_posts_captured = feed_available && !more_available && collection_error.blank?
  140. active_count = active_posts_scope.count
  141. expected_50 = [ active_count, TARGET_CAPTURED_POSTS ].min
  142. latest_50_ready = expected_50.positive? && latest_50_posts.length >= expected_50
  143. latest_50_reason_code =
  144. if expected_50.zero?
  145. "no_recent_posts_available"
  146. elsif latest_50_ready
  147. "ok"
  148. else
  149. "latest_50_posts_not_captured"
  150. end
  151. expected_20 = [ active_count, TARGET_ANALYZED_POSTS ].min
  152. analyzed_recent_20 = latest_20_posts.count { |post| post_analyzed?(post) }
  153. latest_20_ready = expected_20.positive? && analyzed_recent_20 >= expected_20
  154. latest_20_reason_code =
  155. if expected_20.zero?
  156. "no_recent_posts_available"
  157. elsif latest_20_ready
  158. "ok"
  159. else
  160. "latest_posts_not_analyzed"
  161. end
  162. {
  163. "all_posts_captured" => {
  164. "ready" => all_posts_captured,
  165. "reason_code" => all_posts_captured ? "ok" : "all_posts_not_yet_captured",
  166. "captured_posts_count" => active_count,
  167. "more_available" => more_available,
  168. "source" => feed_fetch["source"] || feed_fetch[:source],
  169. "pages_fetched" => feed_fetch["pages_fetched"] || feed_fetch[:pages_fetched],
  170. "feed_available" => feed_available,
  171. "collection_error" => collection_error
  172. }.compact,
  173. "latest_50_captured" => {
  174. "ready" => latest_50_ready,
  175. "reason_code" => latest_50_reason_code,
  176. "expected_count" => expected_50,
  177. "captured_count" => latest_50_posts.length
  178. },
  179. "latest_20_analyzed" => {
  180. "ready" => latest_20_ready,
  181. "reason_code" => latest_20_reason_code,
  182. "expected_count" => expected_20,
  183. "analyzed_count" => analyzed_recent_20
  184. }
  185. }
  186. end
  187. def queue_missing_media_downloads(posts:)
  188. queued_count = 0
  189. pending_count = 0
  190. skipped_count = 0
  191. failures = []
  192. post_ids = []
  193. Array(posts).each do |post|
  194. next unless post
  195. next if post.media.attached?
  196. unless downloadable_post?(post)
  197. skipped_count += 1
  198. next
  199. end
  200. if media_download_in_flight?(post)
  201. pending_count += 1
  202. next
  203. end
  204. job = DownloadInstagramProfilePostMediaJob.perform_later(
  205. instagram_account_id: @account.id,
  206. instagram_profile_id: @profile.id,
  207. instagram_profile_post_id: post.id,
  208. trigger_analysis: true
  209. )
  210. queued_count += 1
  211. post_ids << post.id
  212. mark_history_build_metadata!(
  213. post: post,
  214. attributes: {
  215. "media_download_job_id" => job.job_id,
  216. "media_download_queued_at" => Time.current.iso8601(3)
  217. }
  218. )
  219. rescue StandardError => e
  220. failures << {
  221. "instagram_profile_post_id" => post&.id,
  222. "shortcode" => post&.shortcode.to_s.presence,
  223. "error_class" => e.class.name,
  224. "error_message" => e.message.to_s.byteslice(0, 220)
  225. }.compact
  226. end
  227. {
  228. queued_count: queued_count,
  229. pending_count: pending_count,
  230. skipped_count: skipped_count,
  231. queued_post_ids: post_ids,
  232. failures: failures
  233. }
  234. end
  235. def queue_missing_post_analysis(posts:)
  236. queued_count = 0
  237. pending_count = 0
  238. skipped_count = 0
  239. failures = []
  240. post_ids = []
  241. Array(posts).each do |post|
  242. next unless post
  243. if post_analyzed?(post)
  244. skipped_count += 1
  245. next
  246. end
  247. unless post.media.attached?
  248. pending_count += 1
  249. next
  250. end
  251. if post_analysis_in_flight?(post)
  252. pending_count += 1
  253. next
  254. end
  255. job = AnalyzeInstagramProfilePostJob.perform_later(
  256. instagram_account_id: @account.id,
  257. instagram_profile_id: @profile.id,
  258. instagram_profile_post_id: post.id,
  259. task_flags: {
  260. generate_comments: false,
  261. enforce_comment_evidence_policy: false,
  262. retry_on_incomplete_profile: false
  263. }
  264. )
  265. queued_count += 1
  266. post_ids << post.id
  267. mark_history_build_metadata!(
  268. post: post,
  269. attributes: {
  270. "post_analysis_job_id" => job.job_id,
  271. "post_analysis_queued_at" => Time.current.iso8601(3)
  272. }
  273. )
  274. rescue StandardError => e
  275. failures << {
  276. "instagram_profile_post_id" => post&.id,
  277. "shortcode" => post&.shortcode.to_s.presence,
  278. "error_class" => e.class.name,
  279. "error_message" => e.message.to_s.byteslice(0, 220)
  280. }.compact
  281. end
  282. {
  283. queued_count: queued_count,
  284. pending_count: pending_count,
  285. skipped_count: skipped_count,
  286. queued_post_ids: post_ids,
  287. failures: failures
  288. }
  289. end
  290. def build_queue_state(download_queue:, analysis_queue:, face_refresh_queue: {})
  291. {
  292. "downloads_queued" => download_queue[:queued_count].to_i,
  293. "downloads_pending" => download_queue[:pending_count].to_i,
  294. "downloads_skipped" => download_queue[:skipped_count].to_i,
  295. "analysis_failures" => Array(download_queue[:failures]).first(20),
  296. "analyses_queued" => analysis_queue[:queued_count].to_i,
  297. "analyses_pending" => analysis_queue[:pending_count].to_i,
  298. "analyses_skipped" => analysis_queue[:skipped_count].to_i,
  299. "analysis_queue_failures" => Array(analysis_queue[:failures]).first(20),
  300. "face_refresh_queued" => face_refresh_queue[:queued_count].to_i,
  301. "face_refresh_pending" => face_refresh_queue[:pending_count].to_i,
  302. "face_refresh_deferred" => face_refresh_queue[:deferred_count].to_i,
  303. "face_refresh_failures" => Array(face_refresh_queue[:failures]).first(20)
  304. }
  305. end
  306. def prepare_history_summary(latest_20_posts:)
  307. collector = ExistingPostsCollector.new(posts: latest_20_posts)
  308. Ai::ProfileCommentPreparationService.new(
  309. account: @account,
  310. profile: @profile,
  311. posts_limit: TARGET_ANALYZED_POSTS,
  312. comments_limit: COLLECTION_COMMENTS_LIMIT,
  313. analyze_missing_posts: false,
  314. collector: collector
  315. ).prepare!(force: true)
  316. rescue StandardError => e
  317. {
  318. "ready_for_comment_generation" => false,
  319. "reason_code" => "profile_preparation_error",
  320. "reason" => e.message.to_s,
  321. "error_class" => e.class.name
  322. }
  323. end
  324. def verify_face_identity(latest_posts:)
  325. refresh_queue = {
  326. "queued_count" => 0,
  327. "pending_count" => 0,
  328. "deferred_count" => 0,
  329. "failures" => []
  330. }
  331. eligible_posts = Array(latest_posts).select { |post| post_analyzed?(post) && post.media.attached? }
  332. eligible_posts.each do |post|
  333. if face_refresh_required?(post: post)
  334. if face_refresh_in_flight?(post: post)
  335. refresh_queue["pending_count"] = refresh_queue["pending_count"].to_i + 1
  336. next
  337. end
  338. if refresh_queue["queued_count"].to_i >= FACE_REFRESH_MAX_ENQUEUE_PER_RUN
  339. refresh_queue["deferred_count"] = refresh_queue["deferred_count"].to_i + 1
  340. next
  341. end
  342. enqueue_state = enqueue_face_refresh_for_post(post: post)
  343. if enqueue_state[:queued]
  344. refresh_queue["queued_count"] = refresh_queue["queued_count"].to_i + 1
  345. else
  346. refresh_queue["failures"] << {
  347. "instagram_profile_post_id" => post.id,
  348. "shortcode" => post.shortcode.to_s.presence,
  349. "error_class" => enqueue_state[:error_class].to_s.presence || "enqueue_failed",
  350. "error_message" => enqueue_state[:error_message].to_s.byteslice(0, 220)
  351. }.compact
  352. end
  353. next
  354. end
  355. resolve_identity_for_post!(post: post) if post.instagram_post_faces.exists?
  356. end
  357. refresh_queue["failures"] = Array(refresh_queue["failures"]).first(20)
  358. counts = InstagramPostFace.joins(:instagram_profile_post)
  359. .where(instagram_profile_posts: { instagram_profile_id: @profile.id })
  360. .where.not(instagram_story_person_id: nil)
  361. .group(:instagram_story_person_id)
  362. .count
  363. total_faces = counts.values.sum.to_i
  364. if total_faces <= 0
  365. return {
  366. "confirmed" => false,
  367. "reason_code" => "insufficient_face_data",
  368. "reason" => "No detected faces were available for identity verification.",
  369. "total_faces" => 0,
  370. "reference_face_count" => 0,
  371. "dominance_ratio" => 0.0,
  372. "combined_faces" => [],
  373. "refresh_queue" => refresh_queue
  374. }
  375. end
  376. profile_username = normalize_username(@profile.username)
  377. people = @profile.instagram_story_people.where(id: counts.keys).index_by(&:id)
  378. combined = counts.sort_by { |_id, count| -count.to_i }.map do |person_id, appearances|
  379. person = people[person_id]
  380. linked = linked_usernames_for(person)
  381. label_username = normalize_username(person&.label)
  382. owner_match = linked.include?(profile_username) || label_username == profile_username || person&.role.to_s == "primary_user"
  383. {
  384. "person_id" => person_id,
  385. "label" => person&.display_label.to_s.presence || "person_#{person_id}",
  386. "role" => person&.role.to_s.presence || "unknown",
  387. "appearances" => appearances.to_i,
  388. "linked_usernames" => linked,
  389. "owner_match" => owner_match
  390. }
  391. end
  392. reference_face_count = combined.sum { |row| row["owner_match"] ? row["appearances"].to_i : 0 }
  393. dominance_ratio = total_faces.positive? ? (reference_face_count.to_f / total_faces.to_f).round(4) : 0.0
  394. confirmed = reference_face_count >= FACE_VERIFICATION_MIN_APPEARANCES && dominance_ratio >= FACE_VERIFICATION_MIN_RATIO
  395. reason_code =
  396. if confirmed
  397. "identity_confirmed"
  398. elsif reference_face_count < FACE_VERIFICATION_MIN_APPEARANCES
  399. "insufficient_reference_face_appearances"
  400. else
  401. "identity_match_ratio_too_low"
  402. end
  403. reason =
  404. if confirmed
  405. "Reference face verification confirms this face belongs to @#{@profile.username}."
  406. else
  407. "Reference face verification did not reach the required confidence threshold."
  408. end
  409. {
  410. "confirmed" => confirmed,
  411. "reason_code" => reason_code,
  412. "reason" => reason,
  413. "total_faces" => total_faces,
  414. "reference_face_count" => reference_face_count,
  415. "dominance_ratio" => dominance_ratio,
  416. "combined_faces" => combined.first(12),
  417. "refresh_queue" => refresh_queue
  418. }
  419. rescue StandardError => e
  420. {
  421. "confirmed" => false,
  422. "reason_code" => "face_verification_error",
  423. "reason" => e.message.to_s,
  424. "error_class" => e.class.name,
  425. "total_faces" => 0,
  426. "reference_face_count" => 0,
  427. "dominance_ratio" => 0.0,
  428. "combined_faces" => [],
  429. "refresh_queue" => {
  430. "queued_count" => 0,
  431. "pending_count" => 0,
  432. "deferred_count" => 0,
  433. "failures" => []
  434. }
  435. }
  436. end
  437. def resolve_reason(checks:, queue_state:, preparation:, face_verification:, history_ready:)
  438. return [ "history_ready", "History build completed and identity verified." ] if history_ready
  439. unless ActiveModel::Type::Boolean.new.cast(checks.dig("all_posts_captured", "ready"))
  440. return [ "all_posts_not_yet_captured", "All posts have not been captured yet." ]
  441. end
  442. unless ActiveModel::Type::Boolean.new.cast(checks.dig("latest_50_captured", "ready"))
  443. code = checks.dig("latest_50_captured", "reason_code").to_s.presence || "latest_50_posts_not_captured"
  444. if code == "no_recent_posts_available"
  445. return [ "no_recent_posts_available", "No recent posts are available for history verification." ]
  446. end
  447. return [ "latest_50_posts_not_captured", "Latest 50 posts have not been fully captured yet." ]
  448. end
  449. if queue_state["downloads_queued"].to_i.positive? || queue_state["downloads_pending"].to_i.positive?
  450. return [ "waiting_for_media_downloads", "Waiting for media downloads to complete before verification." ]
  451. end
  452. if queue_state["analyses_queued"].to_i.positive? || queue_state["analyses_pending"].to_i.positive?
  453. return [ "latest_posts_not_analyzed", "Waiting for latest posts to finish analysis." ]
  454. end
  455. if queue_state["face_refresh_queued"].to_i.positive? ||
  456. queue_state["face_refresh_pending"].to_i.positive? ||
  457. queue_state["face_refresh_deferred"].to_i.positive?
  458. return [ "waiting_for_face_refresh", "Waiting for face refresh tasks to complete before verification." ]
  459. end
  460. unless ActiveModel::Type::Boolean.new.cast(checks.dig("latest_20_analyzed", "ready"))
  461. code = checks.dig("latest_20_analyzed", "reason_code").to_s.presence
  462. if code == "no_recent_posts_available"
  463. return [ "no_recent_posts_available", "No recent posts are available for history verification." ]
  464. end
  465. return [ "latest_posts_not_analyzed", "Most recent 20 posts are not fully analyzed yet." ]
  466. end
  467. unless ActiveModel::Type::Boolean.new.cast(preparation["ready_for_comment_generation"])
  468. code = preparation["reason_code"].to_s.presence || "profile_preparation_incomplete"
  469. reason = preparation["reason"].to_s.presence || "Profile preparation is incomplete."
  470. return [ code, reason ]
  471. end
  472. unless ActiveModel::Type::Boolean.new.cast(face_verification["confirmed"])
  473. code = face_verification["reason_code"].to_s.presence || "face_verification_incomplete"
  474. reason = face_verification["reason"].to_s.presence || "Face verification is incomplete."
  475. return [ code, reason ]
  476. end
  477. [ "history_build_in_progress", "History build is still in progress." ]
  478. end
  479. def build_conversation_state(ready:)
  480. strategy = @profile.instagram_profile_message_strategies.recent_first.first
  481. openers = normalize_strings(strategy&.opener_templates).first(8)
  482. incoming_rows = @profile.instagram_messages
  483. .where(direction: "incoming")
  484. .recent_first
  485. .limit(4)
  486. .pluck(:body, :created_at)
  487. .map do |body, created_at|
  488. {
  489. "body" => body.to_s.byteslice(0, 220),
  490. "created_at" => created_at&.iso8601
  491. }
  492. end
  493. has_incoming = incoming_rows.any?
  494. outgoing_count = @profile.instagram_messages.where(direction: "outgoing").count
  495. dm_allowed = @profile.dm_allowed?
  496. ready_bool = ActiveModel::Type::Boolean.new.cast(ready)
  497. {
  498. "can_generate_initial_message" => ready_bool && dm_allowed && !has_incoming && outgoing_count.zero?,
  499. "can_respond_to_existing_messages" => ready_bool && dm_allowed && has_incoming,
  500. "continue_natural_interaction" => ready_bool && dm_allowed,
  501. "dm_allowed" => dm_allowed,
  502. "has_incoming_messages" => has_incoming,
  503. "outgoing_message_count" => outgoing_count,
  504. "suggested_openers" => openers,
  505. "recent_incoming_messages" => incoming_rows
  506. }
  507. rescue StandardError
  508. default_conversation_state(ready: false)
  509. end
  510. def persist_and_result!(status:, ready:, reason_code:, reason:, checks:, queue_state:, preparation:, face_verification:, conversation:)
  511. ready_bool = ActiveModel::Type::Boolean.new.cast(ready)
  512. state = {
  513. "status" => status.to_s,
  514. "ready" => ready_bool,
  515. "reason_code" => reason_code.to_s.presence || (ready_bool ? "history_ready" : "history_build_in_progress"),
  516. "reason" => reason.to_s.presence || (ready_bool ? "History Ready" : "History build in progress."),
  517. "updated_at" => Time.current.iso8601(3),
  518. "checks" => checks,
  519. "queue" => queue_state,
  520. "history_analysis" => {
  521. "ready_for_comment_generation" => ActiveModel::Type::Boolean.new.cast(preparation["ready_for_comment_generation"]),
  522. "reason_code" => preparation["reason_code"].to_s.presence,
  523. "reason" => preparation["reason"].to_s.presence
  524. }.compact,
  525. "face_verification" => face_verification,
  526. "conversation" => conversation
  527. }
  528. behavior = InstagramProfileBehaviorProfile.find_or_initialize_by(instagram_profile: @profile)
  529. metadata = behavior.metadata.is_a?(Hash) ? behavior.metadata.deep_dup : {}
  530. metadata["history_build"] = state
  531. metadata["history_ready"] = ready_bool
  532. metadata["history_ready_at"] = Time.current.iso8601(3) if ready_bool
  533. behavior.metadata = metadata
  534. behavior.activity_score = behavior.activity_score.to_f
  535. behavior.behavioral_summary = {} unless behavior.behavioral_summary.is_a?(Hash)
  536. behavior.save!
  537. {
  538. status: status.to_s,
  539. ready: ready_bool,
  540. reason_code: state["reason_code"],
  541. reason: state["reason"],
  542. retryable_profile_incomplete: PROFILE_INCOMPLETE_REASON_CODES.include?(state["reason_code"].to_s),
  543. history_state: state
  544. }
  545. rescue StandardError
  546. {
  547. status: status.to_s,
  548. ready: ready_bool,
  549. reason_code: reason_code.to_s.presence || "history_state_persist_failed",
  550. reason: reason.to_s.presence || "Unable to persist history build state.",
  551. retryable_profile_incomplete: PROFILE_INCOMPLETE_REASON_CODES.include?(reason_code.to_s),
  552. history_state: {
  553. "status" => status.to_s,
  554. "ready" => ready_bool,
  555. "reason_code" => reason_code.to_s,
  556. "reason" => reason.to_s
  557. }
  558. }
  559. end
  560. def face_refresh_queue_state(face_verification:)
  561. raw = face_verification.is_a?(Hash) ? face_verification["refresh_queue"] : nil
  562. queue = raw.is_a?(Hash) ? raw : {}
  563. {
  564. queued_count: queue["queued_count"].to_i,
  565. pending_count: queue["pending_count"].to_i,
  566. deferred_count: queue["deferred_count"].to_i,
  567. failures: Array(queue["failures"]).first(20)
  568. }
  569. rescue StandardError
  570. {
  571. queued_count: 0,
  572. pending_count: 0,
  573. deferred_count: 0,
  574. failures: []
  575. }
  576. end
  577. def face_refresh_required?(post:)
  578. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  579. face_recognition = metadata["face_recognition"].is_a?(Hash) ? metadata["face_recognition"] : {}
  580. updated_at = parse_time(face_recognition["updated_at"])
  581. stale = updated_at.nil? || updated_at < FACE_RECENCY_REFRESH_DAYS.days.ago
  582. stale || post.instagram_post_faces.none?
  583. rescue StandardError
  584. true
  585. end
  586. def face_refresh_in_flight?(post:)
  587. state = history_build_face_refresh_state(post: post)
  588. status = state["status"].to_s
  589. return false unless status.in?(%w[queued running])
  590. reference_time = parse_time(state["started_at"]) || parse_time(state["queued_at"])
  591. reference_time.present? && reference_time >= FACE_REFRESH_PENDING_WINDOW_HOURS.hours.ago
  592. rescue StandardError
  593. false
  594. end
  595. def enqueue_face_refresh_for_post(post:)
  596. return { queued: false, error_class: "AlreadyQueued", error_message: "Face refresh already in flight." } if face_refresh_in_flight?(post: post)
  597. job = RefreshProfilePostFaceIdentityJob.perform_later(
  598. instagram_account_id: @account.id,
  599. instagram_profile_id: @profile.id,
  600. instagram_profile_post_id: post.id,
  601. trigger_source: "profile_history_build"
  602. )
  603. mark_history_build_metadata!(
  604. post: post,
  605. attributes: {
  606. "face_refresh" => {
  607. "status" => "queued",
  608. "job_id" => job.job_id,
  609. "queue_name" => job.queue_name,
  610. "queued_at" => Time.current.iso8601(3),
  611. "requested_by" => self.class.name
  612. }
  613. }
  614. )
  615. { queued: true, job_id: job.job_id, queue_name: job.queue_name }
  616. rescue StandardError => e
  617. {
  618. queued: false,
  619. error_class: e.class.name,
  620. error_message: e.message.to_s
  621. }
  622. end
  623. def history_build_face_refresh_state(post:)
  624. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  625. history = metadata["history_build"].is_a?(Hash) ? metadata["history_build"] : {}
  626. refresh = history["face_refresh"].is_a?(Hash) ? history["face_refresh"] : {}
  627. refresh
  628. rescue StandardError
  629. {}
  630. end
  631. def resolve_identity_for_post!(post:)
  632. @face_identity_resolution_service.resolve_for_post!(
  633. post: post,
  634. extracted_usernames: extracted_usernames_for_post(post),
  635. content_summary: post.analysis.is_a?(Hash) ? post.analysis : {}
  636. )
  637. rescue StandardError
  638. nil
  639. end
  640. def extracted_usernames_for_post(post)
  641. analysis = post.analysis.is_a?(Hash) ? post.analysis : {}
  642. rows = []
  643. rows.concat(Array(analysis["mentions"]))
  644. rows.concat(Array(analysis["profile_handles"]))
  645. rows.concat(post.caption.to_s.scan(/@[a-zA-Z0-9._]{2,30}/))
  646. rows.concat(analysis["ocr_text"].to_s.scan(/@[a-zA-Z0-9._]{2,30}/))
  647. rows.map { |value| normalize_username(value) }.reject(&:blank?).uniq.first(24)
  648. end
  649. def linked_usernames_for(person)
  650. meta = person&.metadata
  651. linked = meta.is_a?(Hash) ? meta["linked_usernames"] : nil
  652. normalize_strings(linked).map { |value| normalize_username(value) }.reject(&:blank?).uniq
  653. end
  654. def normalize_strings(value)
  655. Array(value).map { |row| row.to_s.strip }.reject(&:blank?)
  656. end
  657. def mark_history_build_metadata!(post:, attributes:)
  658. post.with_lock do
  659. metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  660. state = metadata["history_build"].is_a?(Hash) ? metadata["history_build"].deep_dup : {}
  661. state.merge!(attributes.to_h)
  662. state["updated_at"] = Time.current.iso8601(3)
  663. metadata["history_build"] = state
  664. post.update!(metadata: metadata)
  665. end
  666. rescue StandardError
  667. nil
  668. end
  669. def post_analyzed?(post)
  670. post.ai_status.to_s == "analyzed" && post.analyzed_at.present?
  671. end
  672. def post_analysis_in_flight?(post)
  673. return true if post.ai_status.to_s.in?(%w[pending running])
  674. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  675. pipeline = metadata["ai_pipeline"].is_a?(Hash) ? metadata["ai_pipeline"] : {}
  676. pipeline["status"].to_s == "running"
  677. rescue StandardError
  678. false
  679. end
  680. def media_download_in_flight?(post)
  681. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  682. status = metadata["download_status"].to_s
  683. queued_at = parse_time(metadata["download_queued_at"])
  684. status == "queued" && queued_at.present? && queued_at > 8.hours.ago
  685. rescue StandardError
  686. false
  687. end
  688. def downloadable_post?(post)
  689. return false if deleted_post?(post)
  690. return true if post.source_media_url.to_s.strip.present?
  691. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  692. metadata["media_url_video"].to_s.strip.present? || metadata["media_url_image"].to_s.strip.present?
  693. end
  694. def deleted_post?(post)
  695. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  696. ActiveModel::Type::Boolean.new.cast(metadata["deleted_from_source"])
  697. end
  698. def active_posts_scope
  699. @profile.instagram_profile_posts.where("COALESCE(metadata ->> 'deleted_from_source', 'false') <> 'true'")
  700. end
  701. def normalize_username(value)
  702. text = value.to_s.strip.downcase
  703. text = text.delete_prefix("@")
  704. text.presence
  705. end
  706. def parse_time(value)
  707. return nil if value.to_s.blank?
  708. Time.zone.parse(value.to_s)
  709. rescue StandardError
  710. nil
  711. end
  712. def default_checks
  713. {
  714. "all_posts_captured" => {
  715. "ready" => false,
  716. "reason_code" => "not_started"
  717. },
  718. "latest_50_captured" => {
  719. "ready" => false,
  720. "reason_code" => "not_started"
  721. },
  722. "latest_20_analyzed" => {
  723. "ready" => false,
  724. "reason_code" => "not_started"
  725. }
  726. }
  727. end
  728. def default_queue_state
  729. {
  730. "downloads_queued" => 0,
  731. "downloads_pending" => 0,
  732. "downloads_skipped" => 0,
  733. "analysis_failures" => [],
  734. "analyses_queued" => 0,
  735. "analyses_pending" => 0,
  736. "analyses_skipped" => 0,
  737. "analysis_queue_failures" => [],
  738. "face_refresh_queued" => 0,
  739. "face_refresh_pending" => 0,
  740. "face_refresh_deferred" => 0,
  741. "face_refresh_failures" => []
  742. }
  743. end
  744. def default_face_verification
  745. {
  746. "confirmed" => false,
  747. "reason_code" => "not_started",
  748. "reason" => "Face verification has not started.",
  749. "total_faces" => 0,
  750. "reference_face_count" => 0,
  751. "dominance_ratio" => 0.0,
  752. "combined_faces" => [],
  753. "refresh_queue" => {
  754. "queued_count" => 0,
  755. "pending_count" => 0,
  756. "deferred_count" => 0,
  757. "failures" => []
  758. }
  759. }
  760. end
  761. def default_conversation_state(ready:)
  762. {
  763. "can_generate_initial_message" => false,
  764. "can_respond_to_existing_messages" => false,
  765. "continue_natural_interaction" => ActiveModel::Type::Boolean.new.cast(ready),
  766. "dm_allowed" => false,
  767. "has_incoming_messages" => false,
  768. "outgoing_message_count" => 0,
  769. "suggested_openers" => [],
  770. "recent_incoming_messages" => []
  771. }
  772. end
  773. class ExistingPostsCollector
  774. def initialize(posts:)
  775. @posts = posts
  776. end
  777. def collect_and_persist!(**_kwargs)
  778. { posts: Array(@posts) }
  779. end
  780. end
  781. end
  782. end

app/services/ai/profile_history_narrative_builder.rb

0.0% lines covered

100.0% branches covered

158 relevant lines. 0 lines covered and 158 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Ai
  2. class ProfileHistoryNarrativeBuilder
  3. CHUNK_WORD_LIMIT = 500
  4. INTERESTING_EVENT_KINDS = %w[
  5. story_uploaded
  6. story_viewed
  7. story_downloaded
  8. story_analyzed
  9. story_reply_sent
  10. story_reply_skipped
  11. story_ad_skipped
  12. story_video_skipped
  13. story_sync_failed
  14. feed_post_image_downloaded
  15. feed_post_comment_posted
  16. post_comment_sent
  17. profile_details_refreshed
  18. avatar_downloaded
  19. ].freeze
  20. def self.append_event!(event)
  21. new(event: event).append_event!
  22. end
  23. def self.append_story_intelligence!(event, intelligence:)
  24. new(event: event).append_story_intelligence!(intelligence: intelligence)
  25. end
  26. def initialize(event:)
  27. @event = event
  28. @profile = event.instagram_profile
  29. @account = @profile&.instagram_account
  30. end
  31. def append_event!
  32. return unless @profile && @account
  33. return unless INTERESTING_EVENT_KINDS.include?(@event.kind.to_s)
  34. entry = summarize_event(@event)
  35. return if entry.blank?
  36. ts = @event.occurred_at || @event.detected_at || Time.current
  37. with_profile_lock do
  38. chunk = current_or_new_chunk!(entry: entry, timestamp: ts)
  39. content = chunk.content.to_s
  40. content = [content, entry].reject(&:blank?).join("\n")
  41. chunk.update!(
  42. content: content,
  43. word_count: words_in(content),
  44. entry_count: chunk.entry_count.to_i + 1,
  45. starts_at: chunk.starts_at || ts,
  46. ends_at: ts
  47. )
  48. end
  49. rescue StandardError => e
  50. Rails.logger.warn("[Ai::ProfileHistoryNarrativeBuilder] failed for profile_id=#{@profile&.id}: #{e.class}: #{e.message}")
  51. nil
  52. end
  53. def append_story_intelligence!(intelligence:)
  54. return unless @profile && @account
  55. entry = summarize_story_intelligence(intelligence)
  56. return if entry.blank?
  57. ts = @event.occurred_at || @event.detected_at || Time.current
  58. with_profile_lock do
  59. chunk = current_or_new_chunk!(entry: entry, timestamp: ts)
  60. content = chunk.content.to_s
  61. content = [content, entry].reject(&:blank?).join("\n")
  62. chunk.update!(
  63. content: content,
  64. word_count: words_in(content),
  65. entry_count: chunk.entry_count.to_i + 1,
  66. starts_at: chunk.starts_at || ts,
  67. ends_at: ts
  68. )
  69. end
  70. rescue StandardError => e
  71. Rails.logger.warn("[Ai::ProfileHistoryNarrativeBuilder] intelligence append failed for profile_id=#{@profile&.id}: #{e.class}: #{e.message}")
  72. nil
  73. end
  74. private
  75. def summarize_event(event)
  76. metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
  77. timestamp = (event.occurred_at || event.detected_at || Time.current).in_time_zone.strftime("%Y-%m-%d %H:%M")
  78. base = "[#{timestamp}] #{human_event_title(event.kind)}"
  79. details = []
  80. details << "story_id=#{metadata['story_id']}" if metadata['story_id'].to_s.present?
  81. details << "media=#{metadata['media_type']}" if metadata['media_type'].to_s.present?
  82. details << "location=#{metadata['location']}" if metadata['location'].to_s.present?
  83. details << "event=#{metadata['event']}" if metadata['event'].to_s.present?
  84. details << "description=#{normalize_text(metadata['ai_image_description'])}" if metadata['ai_image_description'].to_s.present?
  85. details << "caption=#{normalize_text(metadata['caption'])}" if metadata['caption'].to_s.present?
  86. details << "comment=#{normalize_text(metadata['ai_reply_text'] || metadata['comment_text'])}" if (metadata['ai_reply_text'].to_s.present? || metadata['comment_text'].to_s.present?)
  87. details << "reason=#{normalize_text(metadata['reason'] || metadata['skip_reason'])}" if (metadata['reason'].to_s.present? || metadata['skip_reason'].to_s.present?)
  88. details << "url=#{metadata['story_url']}" if metadata['story_url'].to_s.present?
  89. details << "permalink=#{metadata['permalink']}" if metadata['permalink'].to_s.present?
  90. details << "topics=#{Array(metadata['topics']).first(8).join(',')}" if Array(metadata['topics']).any?
  91. details << "objects=#{Array(metadata['content_signals']).first(8).join(',')}" if Array(metadata['content_signals']).any?
  92. details << "hashtags=#{Array(metadata['hashtags']).first(8).join(',')}" if Array(metadata['hashtags']).any?
  93. details << "mentions=#{Array(metadata['mentions']).first(6).join(',')}" if Array(metadata['mentions']).any?
  94. details << "ocr=#{normalize_text(metadata['ocr_text'])}" if metadata['ocr_text'].to_s.present?
  95. details << "transcript=#{normalize_text(metadata['transcript'])}" if metadata['transcript'].to_s.present?
  96. line = [base, details.join(" | ")].reject(&:blank?).join(" - ")
  97. line.byteslice(0, 900)
  98. end
  99. def summarize_story_intelligence(intelligence)
  100. data = intelligence.is_a?(Hash) ? intelligence : {}
  101. ts = (@event.occurred_at || @event.detected_at || Time.current).in_time_zone.strftime("%Y-%m-%d %H:%M")
  102. generation_policy = data[:generation_policy].is_a?(Hash) ? data[:generation_policy] : (data["generation_policy"].is_a?(Hash) ? data["generation_policy"] : {})
  103. details = []
  104. details << "topic=#{Array(data[:topics]).first(8).join(',')}" if Array(data[:topics]).any?
  105. details << "objects=#{Array(data[:objects]).first(8).join(',')}" if Array(data[:objects]).any?
  106. details << "hashtags=#{Array(data[:hashtags]).first(8).join(',')}" if Array(data[:hashtags]).any?
  107. details << "mentions=#{Array(data[:mentions]).first(6).join(',')}" if Array(data[:mentions]).any?
  108. details << "handles=#{Array(data[:profile_handles] || data['profile_handles']).first(6).join(',')}" if Array(data[:profile_handles] || data['profile_handles']).any?
  109. details << "detected_users=#{Array(data[:detected_usernames] || data['detected_usernames']).first(6).join(',')}" if Array(data[:detected_usernames] || data['detected_usernames']).any?
  110. details << "source_refs=#{Array(data[:source_profile_references] || data['source_profile_references']).first(4).join(',')}" if Array(data[:source_profile_references] || data['source_profile_references']).any?
  111. details << "share=#{data[:share_status] || data['share_status']}" if (data[:share_status] || data['share_status']).to_s.present?
  112. details << "scenes=#{Array(data[:scenes]).first(6).map { |row| row.is_a?(Hash) ? row[:type] || row['type'] : row }.join(',')}" if Array(data[:scenes]).any?
  113. details << "ocr=#{normalize_text(data[:ocr_text])}" if data[:ocr_text].to_s.present?
  114. details << "transcript=#{normalize_text(data[:transcript])}" if data[:transcript].to_s.present?
  115. details << "description=#{normalize_text(data[:description])}" if data[:description].to_s.present?
  116. details << "faces=#{data[:face_count].to_i}" if data[:face_count].to_i.positive?
  117. details << "ownership=#{data[:ownership_classification] || data['ownership_classification']}" if (data[:ownership_classification] || data['ownership_classification']).to_s.present?
  118. details << "ownership_conf=#{data[:ownership_confidence] || data['ownership_confidence']}" if (data[:ownership_confidence] || data['ownership_confidence']).to_s.present?
  119. details << "ownership_reason=#{Array(data[:ownership_reason_codes] || data['ownership_reason_codes']).first(6).join(',')}" if Array(data[:ownership_reason_codes] || data['ownership_reason_codes']).any?
  120. details << "policy=#{generation_policy[:allow_comment] ? 'allow' : 'skip'}:#{generation_policy[:reason_code]}" if generation_policy.key?(:allow_comment)
  121. return nil if details.empty?
  122. line = "[#{ts}] Story Intelligence Extracted - #{details.join(' | ')}"
  123. line.byteslice(0, 900)
  124. end
  125. def human_event_title(kind)
  126. kind.to_s.tr("_", " ").split.map(&:capitalize).join(" ")
  127. end
  128. def normalize_text(value)
  129. value.to_s.gsub(/\s+/, " ").strip.byteslice(0, 220)
  130. end
  131. def current_or_new_chunk!(entry:, timestamp:)
  132. needed = words_in(entry)
  133. current = @profile.instagram_profile_history_chunks.recent_first.first
  134. return create_chunk!(sequence: 1, timestamp: timestamp) unless current
  135. projected = current.word_count.to_i + needed
  136. return current if projected <= CHUNK_WORD_LIMIT
  137. create_chunk!(sequence: current.sequence.to_i + 1, timestamp: timestamp)
  138. end
  139. def create_chunk!(sequence:, timestamp:)
  140. @profile.instagram_profile_history_chunks.create!(
  141. instagram_account: @account,
  142. sequence: sequence,
  143. content: "",
  144. word_count: 0,
  145. entry_count: 0,
  146. starts_at: timestamp,
  147. ends_at: timestamp,
  148. metadata: { source: "event_narrative_builder", chunk_word_limit: CHUNK_WORD_LIMIT }
  149. )
  150. end
  151. def words_in(text)
  152. text.to_s.scan(/\b[^\s]+\b/).length
  153. end
  154. def with_profile_lock(&block)
  155. @profile.with_lock(&block)
  156. end
  157. end
  158. end

app/services/ai/provider_registry.rb

0.0% lines covered

100.0% branches covered

48 relevant lines. 0 lines covered and 48 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Ai
  2. class ProviderRegistry
  3. PROVIDERS = {
  4. "local" => "Ai::Providers::LocalProvider"
  5. }.freeze
  6. class << self
  7. def provider_keys
  8. PROVIDERS.keys
  9. end
  10. def ensure_settings!
  11. provider_keys.each do |provider|
  12. AiProviderSetting.find_or_create_by!(provider: provider) do |row|
  13. row.enabled = default_enabled?(provider)
  14. row.priority = default_priority(provider)
  15. end
  16. end
  17. end
  18. def enabled_settings
  19. ensure_settings!
  20. AiProviderSetting.where(provider: provider_keys, enabled: true).order(priority: :asc, provider: :asc)
  21. end
  22. def all_settings
  23. ensure_settings!
  24. AiProviderSetting.where(provider: provider_keys).enabled_first
  25. end
  26. def build_provider(provider_key, setting: nil)
  27. klass_name = PROVIDERS[provider_key.to_s]
  28. raise "Unsupported AI provider: #{provider_key}" if klass_name.blank?
  29. klass_name.constantize.new(setting: setting || AiProviderSetting.find_by(provider: provider_key))
  30. end
  31. private
  32. def default_enabled?(provider)
  33. case provider
  34. when "local"
  35. true # Local provider is always available if services are running
  36. else
  37. false
  38. end
  39. end
  40. def default_priority(provider)
  41. case provider
  42. when "local" then 1 # Highest priority for local processing
  43. else 100
  44. end
  45. end
  46. end
  47. end
  48. end

app/services/ai/providers/base_provider.rb

0.0% lines covered

100.0% branches covered

52 relevant lines. 0 lines covered and 52 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Ai
  2. module Providers
  3. class BaseProvider
  4. attr_reader :setting
  5. def initialize(setting: nil)
  6. @setting = setting
  7. end
  8. def key
  9. raise NotImplementedError
  10. end
  11. def display_name
  12. setting&.display_name || key.to_s.humanize
  13. end
  14. def supports_profile?
  15. false
  16. end
  17. def supports_post_image?
  18. false
  19. end
  20. def supports_post_video?
  21. false
  22. end
  23. def available?
  24. return false unless setting&.enabled == true
  25. !requires_api_key? || setting&.api_key_present?
  26. end
  27. def requires_api_key?
  28. true
  29. end
  30. def preferred_model
  31. effective_model
  32. end
  33. def test_key!
  34. raise NotImplementedError
  35. end
  36. def analyze_profile!(_profile_payload:, _media: nil)
  37. raise NotImplementedError
  38. end
  39. def analyze_post!(_post_payload:, _media: nil, _provider_options: {})
  40. raise NotImplementedError
  41. end
  42. protected
  43. def ensure_api_key!
  44. return setting.effective_api_key if setting&.effective_api_key.to_s.present?
  45. raise "Missing API key for #{display_name}"
  46. end
  47. def effective_model
  48. setting&.effective_model.to_s
  49. end
  50. end
  51. end
  52. end

app/services/ai/providers/local_provider.rb

0.0% lines covered

100.0% branches covered

689 relevant lines. 0 lines covered and 689 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Ai
  2. module Providers
  3. class LocalProvider < BaseProvider
  4. def initialize(setting: nil, video_frame_change_detector_service: VideoFrameChangeDetectorService.new)
  5. super(setting: setting)
  6. @video_frame_change_detector_service = video_frame_change_detector_service
  7. end
  8. def key
  9. "local"
  10. end
  11. def supports_profile?
  12. true
  13. end
  14. def supports_post_image?
  15. true
  16. end
  17. def supports_post_video?
  18. true
  19. end
  20. def requires_api_key?
  21. false
  22. end
  23. def test_key!
  24. # Test both microservice and Ollama
  25. microservice_result = client.test_connection!
  26. ollama_result = ollama_client.test_connection!
  27. if microservice_result[:ok] && ollama_result[:ok]
  28. {
  29. ok: true,
  30. message: "Local AI services are healthy",
  31. microservice: microservice_result[:services],
  32. ollama: ollama_result[:models]
  33. }
  34. else
  35. errors = []
  36. errors << "Microservice: #{microservice_result[:message]}" unless microservice_result[:ok]
  37. errors << "Ollama: #{ollama_result[:message]}" unless ollama_result[:ok]
  38. { ok: false, message: errors.join(" | ") }
  39. end
  40. rescue StandardError => e
  41. { ok: false, message: e.message.to_s }
  42. end
  43. def analyze_profile!(profile_payload:, media: nil)
  44. image_labels = []
  45. Array(media).each do |item|
  46. next unless item.is_a?(Hash)
  47. if item[:url].to_s.start_with?("http://", "https://")
  48. vision = client.analyze_image_uri!(item[:url], features: image_features)
  49. image_labels.concat(extract_image_labels(vision))
  50. elsif item[:bytes].present?
  51. vision = client.analyze_image_bytes!(item[:bytes], features: image_features)
  52. image_labels.concat(extract_image_labels(vision))
  53. end
  54. rescue StandardError => e
  55. image_labels << "image_analysis_error:#{e.class.name}"
  56. end
  57. bio = profile_payload[:bio].to_s
  58. recent_messages = Array(profile_payload[:recent_outgoing_messages]).map { |m| m[:body].to_s }.join(" ")
  59. combined = [ bio, recent_messages ].join(" ").downcase
  60. demo = infer_demographic_estimates(text: combined, bio: bio, labels: image_labels)
  61. languages = []
  62. languages << { language: "english", confidence: 0.7, evidence: "ASCII text in bio/messages" } if combined.match?(/[a-z]{3,}/)
  63. analysis = {
  64. "summary" => "Rule-based local AI analysis from profile text and vision labels.",
  65. "languages" => languages,
  66. "likes" => image_labels.first(10),
  67. "dislikes" => [],
  68. "intent_labels" => [ "unknown" ],
  69. "conversation_hooks" => image_labels.first(3).map { |label| { "hook" => "Ask about #{label}", "evidence" => "vision_label:#{label}" } },
  70. "personalization_tokens" => image_labels.first(5),
  71. "no_go_zones" => [],
  72. "writing_style" => {
  73. "tone" => infer_tone(combined),
  74. "formality" => infer_formality(combined),
  75. "emoji_usage" => combined.match?(/[^\x00-\x7F]/) ? "present" : "low",
  76. "slang_level" => infer_slang(combined),
  77. "evidence" => "Derived from bio + latest outgoing messages."
  78. },
  79. "response_style_prediction" => "unknown",
  80. "engagement_probability" => image_labels.any? ? 0.55 : 0.35,
  81. "recommended_next_action" => image_labels.any? ? "comment" : "review",
  82. "demographic_estimates" => {
  83. "age" => demo[:age],
  84. "age_confidence" => demo[:age_confidence],
  85. "gender" => demo[:gender],
  86. "gender_confidence" => demo[:gender_confidence],
  87. "location" => demo[:location],
  88. "location_confidence" => demo[:location_confidence],
  89. "evidence" => demo[:evidence]
  90. },
  91. "self_declared" => {
  92. "age" => extract_age(bio),
  93. "gender" => nil,
  94. "location" => nil,
  95. "pronouns" => extract_pronouns(bio),
  96. "other" => nil
  97. },
  98. "suggested_dm_openers" => [
  99. "Your recent posts are a vibe, what are you into most these days? ✨",
  100. "Okay your content style is low-key fire, what inspired it? 🔥",
  101. "Your feed feels super intentional, got any creator recs?",
  102. "Not gonna lie, your profile energy is elite. What do you like posting most?",
  103. "Your page is giving main-character energy, what are you building next? 👀"
  104. ],
  105. "suggested_comment_templates" => [
  106. "This is such a vibe 🔥",
  107. "Okay this ate, love this one 👏",
  108. "Clean shot, super satisfying fr",
  109. "This goes hard, great share ✨",
  110. "Big fan of this style, keep it coming 🙌"
  111. ],
  112. "confidence_notes" => "Built with local AI models to minimize cost; output is conservative and evidence-driven.",
  113. "why_not_confident" => "Limited structured text/bio and limited image context."
  114. }
  115. {
  116. model: "local-ai-vision+rules",
  117. prompt: {
  118. provider: key,
  119. image_count: Array(media).length,
  120. rule_based: true
  121. },
  122. response_text: "local_ai_rule_based_analysis",
  123. response_raw: { image_labels: image_labels },
  124. analysis: analysis
  125. }
  126. end
  127. def analyze_post!(post_payload:, media: nil, provider_options: {})
  128. options = normalize_provider_options(provider_options)
  129. media_hash = media.is_a?(Hash) ? media : {}
  130. labels = []
  131. raw = {}
  132. image_description = nil
  133. case media_hash[:type].to_s
  134. when "image"
  135. vision, vision_warning = safe_media_analysis(stage: "image_analysis", media_type: "image") do
  136. analyze_image_media(media_hash, provider_options: options)
  137. end
  138. raw[:vision] = vision
  139. labels = extract_image_labels(vision)
  140. if vision_warning
  141. labels << warning_label_for_error(vision_warning[:error_class], prefix: "image_analysis_error")
  142. raw[:vision_warning] = vision_warning
  143. end
  144. labels = labels.uniq
  145. image_description =
  146. if labels.any?
  147. build_image_description_from_vision(vision, labels: labels)
  148. else
  149. "Image analysis unavailable."
  150. end
  151. when "video"
  152. mode = classify_video_processing(media_hash)
  153. raw[:video_processing] = (mode[:metadata].is_a?(Hash) ? mode[:metadata] : {}).merge(
  154. processing_mode: mode[:processing_mode].to_s,
  155. static: ActiveModel::Type::Boolean.new.cast(mode[:static]),
  156. duration_seconds: mode[:duration_seconds]
  157. ).compact
  158. if mode[:processing_mode].to_s == "static_image" && mode[:frame_bytes].present?
  159. static_media = {
  160. type: "image",
  161. content_type: mode[:frame_content_type].to_s.presence || "image/jpeg",
  162. bytes: mode[:frame_bytes]
  163. }
  164. vision, vision_warning = safe_media_analysis(stage: "image_analysis", media_type: "image") do
  165. analyze_image_media(static_media, provider_options: options)
  166. end
  167. raw[:vision] = vision
  168. labels = extract_image_labels(vision)
  169. if vision_warning
  170. labels << warning_label_for_error(vision_warning[:error_class], prefix: "image_analysis_error")
  171. raw[:vision_warning] = vision_warning
  172. end
  173. labels = labels.uniq
  174. image_description =
  175. if labels.any?
  176. "Static video detected; analyzed representative frame. #{build_image_description_from_vision(vision, labels: labels)}".strip
  177. else
  178. "Static video detected, but frame analysis was unavailable."
  179. end
  180. else
  181. video, video_warning = safe_media_analysis(stage: "video_analysis", media_type: "video") do
  182. analyze_video_media(media_hash, provider_options: options)
  183. end
  184. raw[:video] = video
  185. labels = extract_video_labels(video)
  186. if video_warning
  187. labels << warning_label_for_error(video_warning[:error_class], prefix: "video_analysis_error")
  188. raw[:video_warning] = video_warning
  189. end
  190. labels = labels.uniq
  191. image_description =
  192. if labels.any?
  193. build_image_description_from_video(video, labels: labels)
  194. else
  195. "Video analysis unavailable."
  196. end
  197. end
  198. else
  199. labels = []
  200. image_description = "No image or video content available for visual description."
  201. end
  202. visual_labels = meaningful_visual_labels(labels)
  203. detected_face_count = extract_face_count_from_raw(raw)
  204. if detected_face_count.positive? && !visual_labels.include?("person")
  205. visual_labels << "person"
  206. end
  207. visual_labels = visual_labels.uniq
  208. image_description = unavailable_visual_description(raw: raw, media_type: media_hash[:type]) if visual_labels.empty?
  209. author_tags = Array(post_payload.dig(:author_profile, :tags)).map(&:to_s)
  210. ignore_tags = Array(post_payload.dig(:rules, :ignore_if_tagged)).map(&:to_s)
  211. prefer_tags = Array(post_payload.dig(:rules, :prefer_interact_if_tagged)).map(&:to_s)
  212. author_type = infer_author_type(author_tags)
  213. ignored = !(author_tags & ignore_tags).empty?
  214. preferred = !(author_tags & prefer_tags).empty?
  215. relevant = if ignored
  216. false
  217. elsif preferred
  218. true
  219. else
  220. visual_labels.any?
  221. end
  222. actions = if ignored
  223. [ "ignore" ]
  224. elsif preferred
  225. [ "review", "like_suggestion", "comment_suggestion" ]
  226. else
  227. [ "review" ]
  228. end
  229. comment_generation =
  230. if !options[:include_comment_generation]
  231. comment_generation_disabled_result
  232. elsif visual_labels.any?
  233. generate_engagement_comments_with_fallback(
  234. post_payload: post_payload,
  235. image_description: image_description,
  236. labels: visual_labels,
  237. author_type: author_type
  238. )
  239. else
  240. skipped_comment_generation_for_missing_visuals(raw: raw, media_type: media_hash[:type])
  241. end
  242. {
  243. model: [ "local-ai-vision-video+rules", comment_generation[:model] ].compact.join("+"),
  244. prompt: {
  245. provider: key,
  246. media_type: media_hash[:type].to_s,
  247. rule_based: true,
  248. provider_options: options
  249. },
  250. response_text: "local_ai_rule_based_post_analysis",
  251. response_raw: raw.merge(
  252. comment_generation: {
  253. status: comment_generation[:status],
  254. source: comment_generation[:source],
  255. fallback_used: comment_generation[:fallback_used],
  256. model: comment_generation[:model],
  257. error_message: comment_generation[:error_message],
  258. raw: comment_generation[:raw]
  259. }
  260. ),
  261. analysis: {
  262. "image_description" => image_description,
  263. "relevant" => relevant,
  264. "author_type" => author_type,
  265. "topics" => visual_labels.first(12),
  266. "detected_face_count" => detected_face_count,
  267. "visual_signal_count" => visual_labels.length,
  268. "sentiment" => "unknown",
  269. "suggested_actions" => actions,
  270. "recommended_next_action" => actions.first || "review",
  271. "engagement_score" => visual_labels.any? ? 0.6 : 0.2,
  272. "comment_suggestions" => comment_generation[:comment_suggestions] ||
  273. (JSON.parse(comment_generation[:raw][:response])&.dig("comment_suggestions") rescue []),
  274. "comment_generation_status" => comment_generation[:status],
  275. "comment_generation_source" => comment_generation[:source],
  276. "comment_generation_fallback_used" => ActiveModel::Type::Boolean.new.cast(comment_generation[:fallback_used]),
  277. "comment_generation_error" => comment_generation[:error_message].to_s.presence,
  278. "personalization_tokens" => visual_labels.first(5),
  279. "video_processing_mode" => mode_for(media_hash: media_hash, raw: raw),
  280. "video_static_detected" => static_video_detected?(media_hash: media_hash, raw: raw),
  281. "confidence" => visual_labels.any? ? 0.65 : 0.2,
  282. "evidence" => visual_labels.any? ? "Local AI visual signals: #{visual_labels.first(6).join(', ')}" : "No verified visual signals detected; comment generation skipped"
  283. }
  284. }
  285. end
  286. private
  287. def client
  288. @client ||= Ai::LocalMicroserviceClient.new
  289. end
  290. def ollama_client
  291. @ollama_client ||= Ai::OllamaClient.new
  292. end
  293. def image_features(provider_options = {})
  294. options = normalize_provider_options(provider_options)
  295. types = image_feature_types_for_options(options: options)
  296. rows = []
  297. rows << { type: "LABEL_DETECTION", maxResults: 15 } if types.include?("LABEL_DETECTION")
  298. rows << { type: "TEXT_DETECTION", maxResults: 10 } if types.include?("TEXT_DETECTION")
  299. rows << { type: "FACE_DETECTION", maxResults: 8 } if types.include?("FACE_DETECTION")
  300. rows = [ { type: "LABEL_DETECTION", maxResults: 15 } ] if rows.empty?
  301. rows
  302. end
  303. def image_feature_types_for_options(options:)
  304. types = []
  305. types << "LABEL_DETECTION"
  306. types << "TEXT_DETECTION" if options[:include_ocr]
  307. types << "FACE_DETECTION" if options[:include_faces]
  308. types.uniq
  309. end
  310. def video_feature_types_for_options(options:)
  311. return [] unless options[:include_video_analysis]
  312. feature_types = [ "LABEL_DETECTION", "SHOT_CHANGE_DETECTION" ]
  313. feature_types << "FACE_DETECTION" if options[:include_faces]
  314. feature_types.uniq
  315. end
  316. def normalize_provider_options(provider_options)
  317. raw = provider_options.is_a?(Hash) ? provider_options : {}
  318. options = {
  319. visual_only: ActiveModel::Type::Boolean.new.cast(raw[:visual_only] || raw["visual_only"]),
  320. include_faces: true,
  321. include_ocr: true,
  322. include_comment_generation: true,
  323. include_video_analysis: true
  324. }
  325. options[:include_faces] =
  326. if raw.key?(:include_faces) || raw.key?("include_faces")
  327. ActiveModel::Type::Boolean.new.cast(raw[:include_faces] || raw["include_faces"])
  328. elsif options[:visual_only]
  329. false
  330. else
  331. true
  332. end
  333. options[:include_ocr] =
  334. if raw.key?(:include_ocr) || raw.key?("include_ocr")
  335. ActiveModel::Type::Boolean.new.cast(raw[:include_ocr] || raw["include_ocr"])
  336. elsif options[:visual_only]
  337. false
  338. else
  339. true
  340. end
  341. options[:include_comment_generation] =
  342. if raw.key?(:include_comment_generation) || raw.key?("include_comment_generation")
  343. ActiveModel::Type::Boolean.new.cast(raw[:include_comment_generation] || raw["include_comment_generation"])
  344. else
  345. true
  346. end
  347. options[:include_video_analysis] =
  348. if raw.key?(:include_video_analysis) || raw.key?("include_video_analysis")
  349. ActiveModel::Type::Boolean.new.cast(raw[:include_video_analysis] || raw["include_video_analysis"])
  350. else
  351. true
  352. end
  353. options
  354. end
  355. def comment_generation_disabled_result
  356. {
  357. model: ollama_model,
  358. raw: {},
  359. source: "policy",
  360. status: "disabled_by_provider_options",
  361. fallback_used: false,
  362. error_message: nil,
  363. comment_suggestions: []
  364. }
  365. end
  366. def classify_video_processing(media)
  367. bytes = media[:bytes]
  368. return {
  369. processing_mode: "dynamic_video",
  370. frame_bytes: nil,
  371. frame_content_type: nil,
  372. metadata: { reason: "video_bytes_missing" }
  373. } if bytes.blank?
  374. result = @video_frame_change_detector_service.classify(
  375. video_bytes: bytes,
  376. reference_id: media[:reference_id].to_s.presence || "post_media",
  377. content_type: media[:content_type]
  378. )
  379. result.is_a?(Hash) ? result : { processing_mode: "dynamic_video", metadata: { reason: "frame_change_detector_invalid_result" } }
  380. rescue StandardError => e
  381. {
  382. processing_mode: "dynamic_video",
  383. frame_bytes: nil,
  384. frame_content_type: nil,
  385. metadata: {
  386. reason: "frame_change_detection_failed",
  387. error_class: e.class.name,
  388. error_message: normalize_error_message(e.message)
  389. }
  390. }
  391. end
  392. def mode_for(media_hash:, raw:)
  393. return nil unless media_hash[:type].to_s == "video"
  394. raw.dig(:video_processing, :processing_mode).to_s.presence || "dynamic_video"
  395. end
  396. def static_video_detected?(media_hash:, raw:)
  397. return false unless media_hash[:type].to_s == "video"
  398. raw.dig(:video_processing, :processing_mode).to_s == "static_image"
  399. end
  400. def analyze_image_media(media, provider_options: {})
  401. if media[:bytes].present?
  402. # Ensure bytes are properly encoded for binary data
  403. bytes_data = media[:bytes].is_a?(String) ? media[:bytes].force_encoding("BINARY") : media[:bytes]
  404. client.analyze_image_bytes!(bytes_data, features: image_features(provider_options))
  405. elsif media[:url].to_s.start_with?("http://", "https://")
  406. client.analyze_image_uri!(media[:url], features: image_features(provider_options))
  407. else
  408. {}
  409. end
  410. end
  411. def analyze_video_media(media, provider_options: {})
  412. feature_types = video_feature_types_for_options(options: normalize_provider_options(provider_options))
  413. return { response: { annotationResults: [ {} ] } } if feature_types.empty?
  414. bytes = media[:bytes]
  415. raise "Video blob unavailable" if bytes.blank?
  416. client.analyze_video_bytes!(bytes, features: feature_types)
  417. end
  418. def extract_image_labels(vision_response)
  419. labels = Array(vision_response["labelAnnotations"]).map { |v| v["description"].to_s.downcase.strip }.reject(&:blank?)
  420. texts = Array(vision_response["textAnnotations"]).map { |v| v["description"].to_s.downcase.strip }.reject(&:blank?)
  421. faces = Array(vision_response["faceAnnotations"]).length
  422. labels << "person" if faces.positive?
  423. (labels + texts.first(2)).uniq
  424. end
  425. def extract_video_labels(video_response)
  426. ann = video_response.dig("response", "annotationResults", 0)
  427. arr = Array(ann&.dig("segmentLabelAnnotations")) + Array(ann&.dig("shotLabelAnnotations"))
  428. arr.map { |item| item.dig("entity", "description").to_s.downcase.strip }.reject(&:blank?).uniq
  429. end
  430. def infer_author_type(tags)
  431. return "relative" if tags.include?("relative")
  432. return "friend" if tags.include?("friend") || tags.include?("female_friend") || tags.include?("male_friend")
  433. return "page" if tags.include?("page")
  434. return "personal_user" if tags.include?("personal_user")
  435. "unknown"
  436. end
  437. def build_comment_suggestions(labels:, description:)
  438. desc = description.to_s.strip
  439. topic = labels.first.to_s.strip
  440. anchor = topic.presence
  441. if desc.blank? && anchor.blank?
  442. return []
  443. end
  444. [
  445. "Okay this #{anchor} is elite 🔥",
  446. "This whole vibe is so clean, love it ✨",
  447. "Not gonna lie this one ate 👏",
  448. "The energy here is immaculate fr 😮‍💨",
  449. "This is super engaging, big fan 🙌"
  450. ]
  451. end
  452. def generate_engagement_comments(post_payload:, image_description:, labels:, author_type:)
  453. generator = Ai::LocalEngagementCommentGenerator.new(
  454. ollama_client: ollama_client,
  455. model: ollama_model
  456. )
  457. out = generator.generate!(
  458. post_payload: post_payload,
  459. image_description: image_description.to_s,
  460. topics: labels.first(12),
  461. author_type: author_type,
  462. historical_comments: extract_historical_comments(post_payload),
  463. historical_context: extract_historical_context(post_payload)
  464. )
  465. return out unless out[:error_message].present?
  466. {
  467. model: ollama_model,
  468. raw: {},
  469. source: "fallback",
  470. status: "error_fallback",
  471. fallback_used: true,
  472. error_message: out[:error_message],
  473. comment_suggestions: build_comment_suggestions(labels: labels, description: image_description)
  474. }
  475. end
  476. def generate_engagement_comments_with_fallback(post_payload:, image_description:, labels:, author_type:)
  477. generate_engagement_comments(
  478. post_payload: post_payload,
  479. image_description: image_description,
  480. labels: labels,
  481. author_type: author_type
  482. )
  483. rescue StandardError => e
  484. started_at = monotonic_started_at
  485. warning = {
  486. stage: "comment_generation",
  487. media_type: "post",
  488. error_class: e.class.name,
  489. error_message: normalize_error_message(e.message)
  490. }
  491. record_provider_warning!(
  492. warning: warning,
  493. started_at: started_at,
  494. category: "text_generation"
  495. )
  496. {
  497. model: ollama_model,
  498. raw: {},
  499. source: "fallback",
  500. status: "error_fallback",
  501. fallback_used: true,
  502. error_message: warning[:error_message],
  503. comment_suggestions: build_comment_suggestions(labels: labels, description: image_description)
  504. }
  505. end
  506. def meaningful_visual_labels(labels)
  507. Array(labels).map(&:to_s).map(&:downcase).map(&:strip).reject(&:blank?).reject do |label|
  508. label.start_with?("image_analysis_error:", "video_analysis_error:")
  509. end.uniq
  510. end
  511. def extract_face_count_from_raw(raw)
  512. vision_faces = Array(raw.dig(:vision, "faceAnnotations")).length
  513. vision_faces.positive? ? vision_faces : Array(raw.dig(:vision, :faceAnnotations)).length
  514. rescue StandardError
  515. 0
  516. end
  517. def unavailable_visual_description(raw:, media_type:)
  518. warning = raw[:vision_warning] || raw[:video_warning]
  519. if warning.is_a?(Hash)
  520. detail = warning[:error_message].to_s.presence || warning["error_message"].to_s.presence || "analysis_error"
  521. return "Visual analysis unavailable (#{detail.byteslice(0, 120)})."
  522. end
  523. case media_type.to_s
  524. when "image"
  525. "Image analysis unavailable or returned no verifiable visual signals."
  526. when "video"
  527. "Video analysis unavailable or returned no verifiable visual signals."
  528. else
  529. "No image or video content available for visual description."
  530. end
  531. end
  532. def skipped_comment_generation_for_missing_visuals(raw:, media_type:)
  533. {
  534. model: ollama_model,
  535. raw: {},
  536. source: "policy",
  537. status: "skipped_no_visual_signals",
  538. fallback_used: false,
  539. error_message: unavailable_visual_description(raw: raw, media_type: media_type),
  540. comment_suggestions: []
  541. }
  542. end
  543. def safe_media_analysis(stage:, media_type:)
  544. started_at = monotonic_started_at
  545. payload = yield
  546. [ payload, nil ]
  547. rescue StandardError => e
  548. warning = {
  549. stage: stage.to_s,
  550. media_type: media_type.to_s,
  551. error_class: e.class.name,
  552. error_message: normalize_error_message(e.message)
  553. }
  554. record_provider_warning!(
  555. warning: warning,
  556. started_at: started_at,
  557. category: stage.to_s == "video_analysis" ? "video_analysis" : "image_analysis"
  558. )
  559. [ {}, warning ]
  560. end
  561. def warning_label_for_error(error_class, prefix:)
  562. klass = error_class.to_s.presence || "UnknownError"
  563. "#{prefix}:#{klass}"
  564. end
  565. def record_provider_warning!(warning:, started_at:, category:)
  566. payload = warning.to_h.merge(provider: key)
  567. Ops::StructuredLogger.warn(
  568. event: "ai.local_provider.fallback",
  569. payload: payload
  570. )
  571. Ai::ApiUsageTracker.track_failure(
  572. provider: "local_ai_stack",
  573. operation: warning[:stage].to_s.presence || "unknown_stage",
  574. category: category,
  575. started_at: started_at,
  576. error: "#{warning[:error_class]}: #{warning[:error_message]}",
  577. metadata: payload
  578. )
  579. rescue StandardError
  580. nil
  581. end
  582. def monotonic_started_at
  583. Process.clock_gettime(Process::CLOCK_MONOTONIC)
  584. rescue StandardError
  585. Time.current.to_f
  586. end
  587. def normalize_error_message(message)
  588. text = message.to_s.strip
  589. text = "unknown error" if text.blank?
  590. text.byteslice(0, 280)
  591. end
  592. def ollama_model
  593. @ollama_model ||= setting&.config_value("ollama_model").to_s.presence ||
  594. Rails.application.credentials.dig(:ollama, :model).to_s.presence ||
  595. "mistral:7b"
  596. end
  597. def extract_historical_comments(post_payload)
  598. history = post_payload.dig(:rules, :engagement_history, :prior_story_items)
  599. Array(history).filter_map do |row|
  600. row = row.to_h if row.respond_to?(:to_h)
  601. row.is_a?(Hash) ? row[:sent_comment].to_s.presence || row["sent_comment"].to_s.presence : nil
  602. end
  603. rescue StandardError
  604. []
  605. end
  606. def extract_historical_context(post_payload)
  607. post_payload.dig(:rules, :historical_narrative_text).to_s
  608. rescue StandardError
  609. ""
  610. end
  611. def build_image_description_from_vision(vision, labels:)
  612. top_labels = labels.first(5)
  613. text = Array(vision["textAnnotations"]).first&.dig("description").to_s.strip
  614. parts = []
  615. parts << "Likely shows: #{top_labels.join(', ')}." if top_labels.any?
  616. parts << "Visible text: #{text.tr("\n", " ").byteslice(0, 120)}." if text.present?
  617. out = parts.join(" ").strip
  618. out.presence || "Image content appears visually clear but limited contextual details were detected."
  619. end
  620. def build_image_description_from_video(video, labels:)
  621. top = labels.first(6)
  622. return "Video frames indicate: #{top.join(', ')}." if top.any?
  623. "Video content analyzed with local AI models."
  624. end
  625. def infer_tone(text)
  626. return "enthusiastic" if text.include?("!")
  627. return "casual" if text.match?(/\b(hey|yo|lol|omg)\b/)
  628. "neutral"
  629. end
  630. def infer_formality(text)
  631. text.match?(/\b(please|thanks|regards)\b/) ? "formal" : "casual"
  632. end
  633. def infer_slang(text)
  634. text.match?(/\b(lol|lmao|bro|fam|idk|tbh)\b/) ? "medium" : "low"
  635. end
  636. def extract_age(text)
  637. m = text.match(/\b(i am|i'm)\s+(\d{2})\b/i)
  638. return nil unless m
  639. m[2].to_i
  640. end
  641. def extract_pronouns(text)
  642. return "she/her" if text.match?(/\bshe\s*\/\s*her\b/i)
  643. return "he/him" if text.match?(/\bhe\s*\/\s*him\b/i)
  644. return "they/them" if text.match?(/\bthey\s*\/\s*them\b/i)
  645. nil
  646. end
  647. def infer_demographic_estimates(text:, bio:, labels:)
  648. age =
  649. extract_age(bio) ||
  650. if text.match?(/\b(high school|class of 20\d{2})\b/)
  651. 17
  652. elsif text.match?(/\b(student|college|university|campus)\b/)
  653. 21
  654. elsif text.match?(/\b(mom|dad|parent)\b/)
  655. 34
  656. else
  657. 26
  658. end
  659. gender =
  660. if text.match?(/\b(she\/her|she her|woman|girl|mrs|ms)\b/)
  661. "female"
  662. elsif text.match?(/\b(he\/him|he him|man|boy|mr)\b/)
  663. "male"
  664. elsif text.match?(/\b(they\/them|non[- ]?binary)\b/)
  665. "non-binary"
  666. else
  667. "unknown"
  668. end
  669. location =
  670. if (m = text.match(/(?:📍|based in|from)\s+([a-z][a-z\s,.-]{2,40})/))
  671. m[1].to_s.split(/[|•]/).first.to_s.strip.titleize
  672. elsif text.match?(/\b(usa|us|united states)\b/)
  673. "United States"
  674. elsif text.match?(/\b(india|indian|hindi)\b/)
  675. "India"
  676. else
  677. "unknown"
  678. end
  679. {
  680. age: age,
  681. age_confidence: extract_age(bio).present? ? 0.75 : 0.3,
  682. gender: gender,
  683. gender_confidence: gender == "unknown" ? 0.2 : 0.35,
  684. location: location,
  685. location_confidence: location == "unknown" ? 0.2 : 0.35,
  686. evidence: "Estimated from bio/text pronouns, language hints, and local AI vision labels: #{Array(labels).first(4).join(', ')}"
  687. }
  688. end
  689. end
  690. end
  691. end

app/services/ai/runner.rb

0.0% lines covered

100.0% branches covered

289 relevant lines. 0 lines covered and 289 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "digest"
  2. require "uri"
  3. module Ai
  4. class Runner
  5. def initialize(account:)
  6. @account = account
  7. end
  8. def analyze!(
  9. purpose:,
  10. analyzable:,
  11. payload:,
  12. media: nil,
  13. media_fingerprint: nil,
  14. allow_cached: true,
  15. provider_options: {}
  16. )
  17. fingerprint = if purpose == "post"
  18. media_fingerprint.to_s.presence || compute_media_fingerprint(media)
  19. end
  20. if allow_cached && purpose == "post"
  21. cached = reusable_analysis_for(purpose: purpose, media_fingerprint: fingerprint)
  22. return build_cached_run(cached: cached, analyzable: analyzable, purpose: purpose, payload: payload, media_fingerprint: fingerprint) if cached
  23. end
  24. candidates = candidate_providers(purpose: purpose, media: media)
  25. failures = []
  26. candidates.each do |provider|
  27. analysis = AiAnalysis.create!(
  28. instagram_account: @account,
  29. analyzable: analyzable,
  30. purpose: purpose,
  31. provider: provider.key,
  32. model: provider.preferred_model.presence,
  33. status: "running",
  34. started_at: Time.current,
  35. media_fingerprint: fingerprint,
  36. metadata: {
  37. provider_display_name: provider.display_name,
  38. provider_options: (provider_options.is_a?(Hash) ? provider_options : {})
  39. }
  40. )
  41. begin
  42. result = Ai::ApiUsageTracker.with_context(instagram_account_id: @account.id, workflow: "ai_runner", purpose: purpose) do
  43. case purpose
  44. when "profile"
  45. provider.analyze_profile!(profile_payload: payload, media: media)
  46. when "post"
  47. provider.analyze_post!(post_payload: payload, media: media, provider_options: provider_options)
  48. else
  49. raise "Unsupported AI purpose: #{purpose}"
  50. end
  51. end
  52. analysis.update!(
  53. model: result[:model].presence || analysis.model,
  54. status: "succeeded",
  55. finished_at: Time.current,
  56. prompt: JSON.generate(result[:prompt] || {}),
  57. response_text: result[:response_text].to_s,
  58. analysis: result[:analysis],
  59. input_completeness_score: input_completeness_score(payload),
  60. confidence_score: confidence_score(purpose: purpose, analysis: result[:analysis]),
  61. evidence_count: evidence_count(purpose: purpose, analysis: result[:analysis]),
  62. signals_detected_count: signals_detected_count(purpose: purpose, analysis: result[:analysis]),
  63. prompt_version: "v1",
  64. schema_version: schema_version_for(purpose: purpose),
  65. metadata: (analysis.metadata || {}).merge(
  66. cache_hit: false,
  67. raw: result[:response_raw]
  68. ),
  69. error_message: nil
  70. )
  71. sync_materialized_insights!(purpose: purpose, analysis_record: analysis, payload: payload, analysis_hash: result[:analysis])
  72. return { record: analysis, result: result, provider: provider }
  73. rescue StandardError => e
  74. analysis.update!(status: "failed", finished_at: Time.current, error_message: e.message.to_s)
  75. failures << "#{provider.display_name}: #{e.message}"
  76. end
  77. end
  78. raise "All enabled AI providers failed. #{failures.join(' | ')}"
  79. end
  80. private
  81. def candidate_providers(purpose:, media:)
  82. settings = Ai::ProviderRegistry.enabled_settings.to_a
  83. raise "No AI providers are enabled. Configure one in Admin > AI Providers." if settings.empty?
  84. settings = filter_settings_by_daily_limit(settings: settings, purpose: purpose)
  85. candidates = settings.filter_map do |setting|
  86. provider = Ai::ProviderRegistry.build_provider(setting.provider, setting: setting)
  87. next nil unless provider.available?
  88. next nil unless supports_purpose?(provider, purpose: purpose, media: media)
  89. provider
  90. end
  91. raise "No enabled AI provider supports this analysis type." if candidates.empty?
  92. candidates
  93. end
  94. def reusable_analysis_for(purpose:, media_fingerprint:)
  95. return nil if media_fingerprint.blank?
  96. candidate = AiAnalysis.reusable_for(purpose: purpose, media_fingerprint: media_fingerprint).first
  97. return nil unless candidate
  98. return nil if purpose == "post" && legacy_post_comment_generation_payload?(candidate.analysis)
  99. candidate
  100. end
  101. def legacy_post_comment_generation_payload?(analysis_hash)
  102. return false unless analysis_hash.is_a?(Hash)
  103. return false unless analysis_hash.key?("comment_suggestions")
  104. return true if analysis_hash["comment_generation_status"].to_s == "error_fallback"
  105. return true if analysis_hash["comment_generation_status"].to_s.blank?
  106. return true if analysis_hash["evidence"].to_s.include?("No labels detected; used tag rules only")
  107. return true unless analysis_hash.key?("visual_signal_count")
  108. false
  109. end
  110. def build_cached_run(cached:, analyzable:, purpose:, payload:, media_fingerprint:)
  111. provider = provider_for_key(cached.provider)
  112. now = Time.current
  113. record = AiAnalysis.create!(
  114. instagram_account: @account,
  115. analyzable: analyzable,
  116. purpose: purpose,
  117. provider: cached.provider,
  118. model: cached.model,
  119. status: "succeeded",
  120. started_at: now,
  121. finished_at: now,
  122. prompt: cached.prompt,
  123. response_text: cached.response_text,
  124. analysis: cached.analysis,
  125. input_completeness_score: input_completeness_score(payload),
  126. confidence_score: cached.confidence_score,
  127. evidence_count: cached.evidence_count,
  128. signals_detected_count: cached.signals_detected_count,
  129. prompt_version: cached.prompt_version,
  130. schema_version: cached.schema_version,
  131. media_fingerprint: media_fingerprint,
  132. cache_hit: true,
  133. cached_from_ai_analysis_id: cached.id,
  134. metadata: (cached.metadata || {}).merge(
  135. cache_hit: true,
  136. reused_from_ai_analysis_id: cached.id,
  137. reused_at: now.iso8601
  138. )
  139. )
  140. sync_materialized_insights!(purpose: purpose, analysis_record: record, payload: payload, analysis_hash: cached.analysis)
  141. {
  142. record: record,
  143. result: {
  144. model: cached.model,
  145. prompt: parsed_json_or_hash(cached.prompt),
  146. response_text: cached.response_text.to_s,
  147. response_raw: cached.metadata,
  148. analysis: cached.analysis
  149. },
  150. provider: provider,
  151. cached: true
  152. }
  153. end
  154. def parsed_json_or_hash(value)
  155. return value if value.is_a?(Hash)
  156. JSON.parse(value.to_s)
  157. rescue StandardError
  158. {}
  159. end
  160. def provider_for_key(provider_key)
  161. Ai::ProviderRegistry.build_provider(provider_key)
  162. rescue StandardError
  163. Struct.new(:key, :display_name).new(provider_key.to_s, provider_key.to_s.humanize)
  164. end
  165. def filter_settings_by_daily_limit(settings:, purpose:)
  166. todays_counts = AiAnalysis.where(purpose: purpose, status: "succeeded")
  167. .where(created_at: Time.current.all_day)
  168. .group(:provider)
  169. .count
  170. with_load = settings.map do |setting|
  171. limit = integer_or_nil(setting.config_value("daily_limit"))
  172. used = todays_counts[setting.provider].to_i
  173. utilization = limit.to_i.positive? ? (used.to_f / limit.to_f) : 0.0
  174. [ setting, limit, used, utilization ]
  175. end
  176. available = with_load.reject { |_setting, limit, used, _utilization| limit.to_i.positive? && used >= limit }
  177. sorted = available.sort_by { |setting, _limit, _used, utilization| [ setting.priority.to_i, utilization, setting.provider ] }
  178. sorted.map(&:first)
  179. end
  180. def integer_or_nil(value)
  181. return nil if value.blank?
  182. Integer(value)
  183. rescue StandardError
  184. nil
  185. end
  186. def compute_media_fingerprint(media)
  187. item = media.is_a?(Array) ? media.first : media
  188. return nil unless item.is_a?(Hash)
  189. bytes = item[:bytes] || item["bytes"]
  190. return Digest::SHA256.hexdigest(bytes) if bytes.present?
  191. data_url = item[:image_data_url] || item["image_data_url"]
  192. return Digest::SHA256.hexdigest(data_url.to_s) if data_url.present?
  193. url = item[:url] || item["url"]
  194. normalized = normalize_url(url)
  195. return Digest::SHA256.hexdigest(normalized) if normalized.present?
  196. nil
  197. end
  198. def normalize_url(raw)
  199. value = raw.to_s.strip
  200. return nil if value.blank?
  201. uri = URI.parse(value)
  202. return value unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
  203. "#{uri.scheme}://#{uri.host}#{uri.path}"
  204. rescue StandardError
  205. value
  206. end
  207. def supports_purpose?(provider, purpose:, media:)
  208. return provider.supports_profile? if purpose == "profile"
  209. return false unless purpose == "post"
  210. type = media.is_a?(Hash) ? media[:type].to_s : ""
  211. return provider.supports_post_video? if type == "video"
  212. provider.supports_post_image?
  213. end
  214. def sync_materialized_insights!(purpose:, analysis_record:, payload:, analysis_hash:)
  215. return unless analysis_hash.is_a?(Hash)
  216. case purpose
  217. when "profile"
  218. Ai::InsightSync.sync_profile!(analysis_record: analysis_record, payload: payload, analysis_hash: analysis_hash)
  219. when "post"
  220. Ai::InsightSync.sync_post!(analysis_record: analysis_record, analysis_hash: analysis_hash)
  221. end
  222. end
  223. def schema_version_for(purpose:)
  224. case purpose
  225. when "profile" then "profile_insights_v2"
  226. when "post" then "post_insights_v2"
  227. else "unknown"
  228. end
  229. end
  230. def input_completeness_score(payload)
  231. total = 0
  232. present = 0
  233. walk_payload(payload) do |value|
  234. total += 1
  235. present += 1 if value.present?
  236. end
  237. return nil if total <= 0
  238. (present.to_f / total).round(4)
  239. end
  240. def walk_payload(value, &block)
  241. case value
  242. when Hash
  243. value.each_value { |v| walk_payload(v, &block) }
  244. when Array
  245. if value.empty?
  246. block.call(nil)
  247. else
  248. value.each { |v| walk_payload(v, &block) }
  249. end
  250. else
  251. block.call(value)
  252. end
  253. end
  254. def confidence_score(purpose:, analysis:)
  255. return nil unless analysis.is_a?(Hash)
  256. if purpose == "post"
  257. val = analysis["confidence"]
  258. return Float(val).clamp(0.0, 1.0) rescue nil
  259. end
  260. langs = Array(analysis["languages"]).size
  261. likes = Array(analysis["likes"]).size
  262. ([(langs * 0.1) + (likes * 0.05), 1.0].min).round(4)
  263. end
  264. def evidence_count(purpose:, analysis:)
  265. return 0 unless analysis.is_a?(Hash)
  266. if purpose == "post"
  267. count = 0
  268. count += 1 if analysis["evidence"].to_s.present?
  269. count += Array(analysis["topics"]).size
  270. return count
  271. end
  272. count = 0
  273. count += Array(analysis["languages"]).size
  274. count += Array(analysis["likes"]).size
  275. count += Array(analysis["dislikes"]).size
  276. count += 1 if analysis["confidence_notes"].to_s.present?
  277. count
  278. end
  279. def signals_detected_count(purpose:, analysis:)
  280. return 0 unless analysis.is_a?(Hash)
  281. if purpose == "post"
  282. return Array(analysis["topics"]).size + Array(analysis["suggested_actions"]).size
  283. end
  284. self_declared = analysis["self_declared"].is_a?(Hash) ? analysis["self_declared"] : {}
  285. declared_count = self_declared.values.count(&:present?)
  286. Array(analysis["languages"]).size + Array(analysis["likes"]).size + declared_count
  287. end
  288. end
  289. end

app/services/ai/verified_story_insight_builder.rb

0.0% lines covered

100.0% branches covered

610 relevant lines. 0 lines covered and 610 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Ai
  2. class VerifiedStoryInsightBuilder
  3. MIN_OCR_BLOCK_CONFIDENCE = 0.35
  4. MIN_OBJECT_CONFIDENCE = 0.30
  5. MIN_SIGNAL_SCORE_FOR_COMMENT = 3
  6. MIN_OWNER_ALIGNMENT_CONFIDENCE = 0.58
  7. OCR_USERNAME_REGEX = /@([a-zA-Z0-9._]{2,30})/
  8. BARE_USERNAME_REGEX = /\b([a-zA-Z0-9._]{3,30})\b/
  9. RESHARE_PATTERNS = [
  10. /\brepost\b/i,
  11. /\breshare\b/i,
  12. /\bshared\s+from\b/i,
  13. /\bvia\s+@?[a-z0-9._]+\b/i,
  14. /\bcredit(?:s)?\b/i,
  15. /\boriginal\s+by\b/i
  16. ].freeze
  17. MEME_PATTERNS = [
  18. /\bmemes?\b/i,
  19. /\bi know nobody gave you\b/i,
  20. /\bdon'?t worry\b/i,
  21. /\bwhen you\b/i
  22. ].freeze
  23. RESERVED_IG_SEGMENTS = %w[stories p reel reels tv explore accounts direct v].freeze
  24. def initialize(profile:, local_story_intelligence:, metadata:)
  25. @profile = profile
  26. @raw = local_story_intelligence.is_a?(Hash) ? local_story_intelligence : {}
  27. @metadata = metadata.is_a?(Hash) ? metadata : {}
  28. end
  29. def build
  30. verified_story_facts = build_verified_story_facts
  31. ownership_classification = classify_ownership(verified_story_facts: verified_story_facts)
  32. generation_policy = build_generation_policy(
  33. verified_story_facts: verified_story_facts,
  34. ownership_classification: ownership_classification
  35. )
  36. {
  37. verified_story_facts: verified_story_facts,
  38. ownership_classification: ownership_classification,
  39. generation_policy: generation_policy,
  40. validated_at: Time.current.iso8601
  41. }
  42. end
  43. private
  44. def build_verified_story_facts
  45. ocr_blocks = normalize_ocr_blocks(@raw[:ocr_blocks] || @raw["ocr_blocks"])
  46. object_detections = normalize_object_detections(@raw[:object_detections] || @raw["object_detections"])
  47. scenes = normalize_scenes(@raw[:scenes] || @raw["scenes"])
  48. ocr_text = normalize_text(@raw[:ocr_text] || @raw["ocr_text"], max: 800)
  49. if ocr_blocks.any?
  50. ocr_text = ocr_blocks.map { |row| row[:text] }.join("\n").presence || ocr_text
  51. end
  52. transcript = normalize_text(@raw[:transcript] || @raw["transcript"], max: 800)
  53. mentions = normalize_handle_array(@raw[:mentions] || @raw["mentions"], prefix: "@")
  54. hashtags = normalize_handle_array(@raw[:hashtags] || @raw["hashtags"], prefix: "#")
  55. objects = normalize_objects(@raw[:objects] || @raw["objects"], object_detections: object_detections)
  56. topics = normalize_topics(@raw[:topics] || @raw["topics"], objects: objects, hashtags: hashtags)
  57. faces = normalize_faces
  58. detected_usernames = detect_usernames(
  59. mentions: mentions,
  60. profile_handles: @raw[:profile_handles] || @raw["profile_handles"],
  61. ocr_text: ocr_text,
  62. transcript: transcript,
  63. metadata: @metadata
  64. )
  65. source_profile_references = extract_source_profile_references(metadata: @metadata)
  66. source_profile_ids = extract_source_profile_ids(metadata: @metadata)
  67. reshare_hits = detect_reshare_indicators(
  68. ocr_text: ocr_text,
  69. transcript: transcript,
  70. metadata: @metadata
  71. )
  72. meme_markers = detect_meme_markers(
  73. ocr_text: ocr_text,
  74. transcript: transcript,
  75. metadata: @metadata
  76. )
  77. identity_verification = build_identity_verification(
  78. faces: faces,
  79. topics: topics,
  80. detected_usernames: detected_usernames,
  81. source_profile_references: source_profile_references
  82. )
  83. signal_score = score_verified_signals(
  84. ocr_text: ocr_text,
  85. transcript: transcript,
  86. objects: objects,
  87. object_detections: object_detections,
  88. scenes: scenes,
  89. hashtags: hashtags,
  90. mentions: mentions,
  91. faces: faces
  92. )
  93. {
  94. source: @raw[:source].to_s.presence || @raw["source"].to_s.presence || "unknown",
  95. reason: @raw[:reason].to_s.presence || @raw["reason"].to_s.presence,
  96. ocr_text: ocr_text,
  97. ocr_blocks: ocr_blocks.first(30),
  98. transcript: transcript,
  99. object_detections: object_detections.first(30),
  100. objects: objects.first(20),
  101. scenes: scenes.first(20),
  102. hashtags: hashtags.first(20),
  103. mentions: mentions.first(20),
  104. profile_handles: Array(@raw[:profile_handles] || @raw["profile_handles"]).map(&:to_s).first(20),
  105. topics: topics.first(20),
  106. detected_usernames: detected_usernames.first(20),
  107. source_profile_references: source_profile_references.first(20),
  108. source_profile_ids: source_profile_ids.first(20),
  109. reshare_indicators: reshare_hits.first(12),
  110. meme_markers: meme_markers.first(12),
  111. media_type: @metadata["media_type"].to_s.presence,
  112. faces: faces,
  113. face_count: faces[:total_count].to_i,
  114. people: faces[:people].first(12),
  115. identity_verification: identity_verification,
  116. signal_score: signal_score
  117. }
  118. end
  119. def classify_ownership(verified_story_facts:)
  120. profile_username = normalize_username(@profile&.username)
  121. usernames = Array(verified_story_facts[:detected_usernames]).map { |value| normalize_username(value) }.reject(&:blank?).uniq
  122. external_usernames = usernames.reject { |value| value == profile_username }
  123. profile_username_detected = profile_username.present? && usernames.include?(profile_username)
  124. source_profile_references = Array(verified_story_facts[:source_profile_references]).map { |value| normalize_username(value) }.reject(&:blank?).uniq
  125. external_source_refs = source_profile_references.reject { |value| value == profile_username }
  126. face_data = verified_story_facts[:faces].is_a?(Hash) ? verified_story_facts[:faces] : {}
  127. primary_faces = face_data[:primary_user_count].to_i
  128. secondary_faces = face_data[:secondary_person_count].to_i
  129. identity_verification = verified_story_facts[:identity_verification].is_a?(Hash) ? verified_story_facts[:identity_verification] : {}
  130. owner_likelihood = identity_verification[:owner_likelihood].to_s
  131. identity_confidence = identity_verification[:confidence].to_f
  132. non_primary_faces_without_primary = secondary_faces.positive? && primary_faces <= 0
  133. reshare_hits = Array(verified_story_facts[:reshare_indicators]).map(&:to_s)
  134. meme_markers = Array(verified_story_facts[:meme_markers]).map(&:to_s)
  135. third_party_link = third_party_profile_link_detected?(profile_username: profile_username, metadata: @metadata)
  136. share_status = infer_share_status(
  137. profile_username_detected: profile_username_detected,
  138. external_usernames: external_usernames,
  139. external_source_refs: external_source_refs,
  140. reshare_hits: reshare_hits,
  141. meme_markers: meme_markers
  142. )
  143. reason_codes = []
  144. reason_codes << "external_usernames_detected" if external_usernames.any?
  145. reason_codes << "external_source_profile_reference_detected" if external_source_refs.any?
  146. reason_codes << "profile_username_not_detected" if profile_username.present? && !profile_username_detected
  147. reason_codes << "non_primary_faces_detected" if non_primary_faces_without_primary
  148. reason_codes << "reshare_indicators_detected" if reshare_hits.any?
  149. reason_codes << "meme_markers_detected" if meme_markers.any?
  150. reason_codes << "third_party_profile_link_detected" if third_party_link
  151. reason_codes << "identity_likelihood_low" if owner_likelihood == "low"
  152. reason_codes << "identity_likelihood_high" if owner_likelihood == "high"
  153. reason_codes << "identity_confidence_low" if identity_confidence.positive? && identity_confidence < 0.45
  154. reason_codes << "share_status_#{share_status}" if share_status != "unknown"
  155. signal_score = verified_story_facts[:signal_score].to_i
  156. label = "owned_by_profile"
  157. decision = "allow_comment"
  158. if signal_score < MIN_SIGNAL_SCORE_FOR_COMMENT
  159. label = "insufficient_evidence"
  160. decision = "skip_comment"
  161. reason_codes << "insufficient_verified_signals"
  162. elsif meme_markers.any? && external_usernames.any?
  163. label = "meme_reshare"
  164. decision = "skip_comment"
  165. elsif meme_markers.any? && !profile_username_detected
  166. label = "meme_reshare"
  167. decision = "skip_comment"
  168. elsif share_status == "reshared" && external_usernames.any?
  169. label = "reshare"
  170. decision = "skip_comment"
  171. elsif reshare_hits.any? || third_party_link
  172. label = "reshare"
  173. decision = "skip_comment"
  174. elsif external_source_refs.any? && !profile_username_detected
  175. label = "third_party_content"
  176. decision = "skip_comment"
  177. elsif external_usernames.any? && !profile_username_detected && non_primary_faces_without_primary
  178. label = "third_party_content"
  179. decision = "skip_comment"
  180. elsif external_usernames.any? && !profile_username_detected && signal_score <= 3
  181. label = "third_party_content"
  182. decision = "skip_comment"
  183. elsif non_primary_faces_without_primary && signal_score <= 2
  184. label = "unrelated_post"
  185. decision = "skip_comment"
  186. elsif owner_likelihood == "low" && (external_usernames.any? || external_source_refs.any? || non_primary_faces_without_primary)
  187. label = "third_party_content"
  188. decision = "skip_comment"
  189. elsif owner_likelihood == "high" && identity_confidence >= MIN_OWNER_ALIGNMENT_CONFIDENCE && share_status == "unknown" && reshare_hits.empty? && meme_markers.empty?
  190. label = "owned_by_profile"
  191. decision = "allow_comment"
  192. end
  193. {
  194. label: label,
  195. decision: decision,
  196. confidence: ownership_confidence(
  197. label: label,
  198. reason_codes: reason_codes,
  199. signal_score: signal_score
  200. ),
  201. reason_codes: reason_codes.uniq,
  202. profile_username_detected: profile_username_detected,
  203. share_status: share_status,
  204. source_profile_references: source_profile_references.first(10),
  205. source_profile_ids: Array(verified_story_facts[:source_profile_ids]).map(&:to_s).first(10),
  206. detected_external_usernames: external_usernames.first(10),
  207. reshare_indicators: reshare_hits.first(10),
  208. meme_markers: meme_markers.first(10),
  209. identity_verification: identity_verification,
  210. face_evidence: {
  211. primary_user_count: primary_faces,
  212. secondary_person_count: secondary_faces,
  213. total_count: face_data[:total_count].to_i
  214. },
  215. summary: ownership_summary(
  216. label: label,
  217. external_usernames: external_usernames,
  218. external_source_refs: external_source_refs,
  219. reshare_hits: reshare_hits,
  220. meme_markers: meme_markers,
  221. primary_faces: primary_faces,
  222. secondary_faces: secondary_faces,
  223. signal_score: signal_score
  224. )
  225. }
  226. end
  227. def build_generation_policy(verified_story_facts:, ownership_classification:)
  228. allow_comment = ownership_classification[:decision].to_s == "allow_comment"
  229. identity_verification = verified_story_facts[:identity_verification].is_a?(Hash) ? verified_story_facts[:identity_verification] : {}
  230. if allow_comment &&
  231. ownership_classification[:label].to_s == "owned_by_profile" &&
  232. identity_verification[:owner_likelihood].to_s == "low" &&
  233. identity_verification[:confidence].to_f < MIN_OWNER_ALIGNMENT_CONFIDENCE
  234. allow_comment = false
  235. end
  236. reason_code = if allow_comment
  237. "verified_context_available"
  238. else
  239. ownership_classification[:reason_codes].first.to_s.presence || "policy_blocked"
  240. end
  241. reason = if allow_comment
  242. "Verified context is sufficient for grounded generation."
  243. else
  244. ownership_classification[:summary].to_s.presence || "Insufficient or irrelevant verified context for safe comment generation."
  245. end
  246. {
  247. allow_comment: allow_comment,
  248. reason_code: reason_code,
  249. reason: reason,
  250. classification: ownership_classification[:label].to_s,
  251. signal_score: verified_story_facts[:signal_score].to_i,
  252. minimum_signal_score: MIN_SIGNAL_SCORE_FOR_COMMENT,
  253. owner_likelihood: identity_verification[:owner_likelihood].to_s,
  254. identity_confidence: identity_verification[:confidence].to_f.round(2),
  255. source: "verified_story_insight_builder"
  256. }
  257. end
  258. def normalize_ocr_blocks(value)
  259. Array(value).filter_map do |row|
  260. next unless row.is_a?(Hash)
  261. text = normalize_text(row[:text] || row["text"], max: 180)
  262. next if text.blank?
  263. confidence = (row[:confidence] || row["confidence"]).to_f
  264. next if confidence.positive? && confidence < MIN_OCR_BLOCK_CONFIDENCE
  265. {
  266. text: text,
  267. confidence: confidence,
  268. source: (row[:source] || row["source"]).to_s.presence || "ocr",
  269. timestamp: row[:timestamp] || row["timestamp"]
  270. }.compact
  271. end
  272. end
  273. def normalize_object_detections(value)
  274. Array(value).filter_map do |row|
  275. next unless row.is_a?(Hash)
  276. label = normalize_text(row[:label] || row["label"] || row[:description] || row["description"], max: 80)&.downcase
  277. next if label.blank?
  278. confidence = (row[:confidence] || row["confidence"] || row[:score] || row["score"] || row[:max_confidence] || row["max_confidence"]).to_f
  279. next if confidence.positive? && confidence < MIN_OBJECT_CONFIDENCE
  280. {
  281. label: label,
  282. confidence: confidence,
  283. timestamps: Array(row[:timestamps] || row["timestamps"]).map(&:to_f).first(20)
  284. }
  285. end.uniq { |row| [row[:label], row[:timestamps]] }
  286. end
  287. def normalize_scenes(value)
  288. Array(value).filter_map do |row|
  289. next unless row.is_a?(Hash)
  290. scene_type = normalize_text(row[:type] || row["type"], max: 60)
  291. next if scene_type.blank?
  292. {
  293. type: scene_type.downcase,
  294. timestamp: row[:timestamp] || row["timestamp"],
  295. correlation: row[:correlation] || row["correlation"]
  296. }.compact
  297. end
  298. end
  299. def normalize_objects(raw_objects, object_detections:)
  300. from_objects = Array(raw_objects).map { |row| normalize_text(row, max: 80) }.compact.map(&:downcase)
  301. from_detections = Array(object_detections).map { |row| row[:label].to_s.downcase }.reject(&:blank?)
  302. (from_objects + from_detections).uniq.first(40)
  303. end
  304. def normalize_topics(raw_topics, objects:, hashtags:)
  305. from_topics = Array(raw_topics).map { |row| normalize_text(row, max: 80) }.compact.map(&:downcase)
  306. from_hashtags = Array(hashtags).map { |tag| tag.to_s.delete_prefix("#").downcase }
  307. (from_topics + objects + from_hashtags).reject(&:blank?).uniq.first(40)
  308. end
  309. def normalize_faces
  310. people_rows = Array(@raw[:people] || @raw["people"]).filter_map do |row|
  311. next unless row.is_a?(Hash)
  312. role = (row[:role] || row["role"]).to_s
  313. next if role.blank?
  314. {
  315. person_id: row[:person_id] || row["person_id"],
  316. role: role,
  317. similarity: (row[:similarity] || row["similarity"]).to_f,
  318. label: (row[:label] || row["label"]).to_s.presence,
  319. age: (row[:age] || row["age"]).to_f.positive? ? (row[:age] || row["age"]).to_f.round(1) : nil,
  320. age_range: (row[:age_range] || row["age_range"]).to_s.presence,
  321. gender: (row[:gender] || row["gender"]).to_s.presence,
  322. gender_score: (row[:gender_score] || row["gender_score"]).to_f
  323. }.compact
  324. end
  325. total_count = (@raw[:face_count] || @raw["face_count"]).to_i
  326. total_count = [total_count, people_rows.size].max
  327. primary_user_count = people_rows.count { |row| row[:role].to_s == "primary_user" }
  328. secondary_person_count = people_rows.count { |row| row[:role].to_s == "secondary_person" }
  329. unknown_count = [total_count - (primary_user_count + secondary_person_count), 0].max
  330. {
  331. total_count: total_count,
  332. primary_user_count: primary_user_count,
  333. secondary_person_count: secondary_person_count,
  334. unknown_count: unknown_count,
  335. people: people_rows
  336. }
  337. end
  338. def build_identity_verification(faces:, topics:, detected_usernames:, source_profile_references:)
  339. profile_username = normalize_username(@profile&.username)
  340. people = faces.is_a?(Hash) ? Array(faces[:people]) : []
  341. person_ids = people.map { |row| row[:person_id] }.compact
  342. people_index = if @profile&.respond_to?(:instagram_story_people)
  343. @profile.instagram_story_people.where(id: person_ids).index_by(&:id)
  344. else
  345. {}
  346. end
  347. behavior_profile = @profile&.respond_to?(:instagram_profile_behavior_profile) ? @profile.instagram_profile_behavior_profile : nil
  348. behavior_summary = behavior_profile&.behavioral_summary
  349. behavior_summary = behavior_summary.is_a?(Hash) ? behavior_summary : {}
  350. face_identity_profile = behavior_summary["face_identity_profile"].is_a?(Hash) ? behavior_summary["face_identity_profile"] : {}
  351. historical_primary_person_id = face_identity_profile["person_id"] || face_identity_profile[:person_id]
  352. primary_person_present = people.any? { |row| row[:role].to_s == "primary_user" }
  353. recurring_primary_person = historical_primary_person_id.present? && people.any? { |row| row[:person_id].to_s == historical_primary_person_id.to_s }
  354. profile_topics = extract_profile_bio_topics
  355. topic_overlap = (profile_topics & Array(topics).map { |value| value.to_s.downcase.strip }.reject(&:blank?)).first(8)
  356. normalized_usernames = Array(detected_usernames).map { |value| normalize_username(value) }.reject(&:blank?)
  357. normalized_refs = Array(source_profile_references).map { |value| normalize_username(value) }.reject(&:blank?)
  358. profile_username_match = profile_username.present? && (normalized_usernames.include?(profile_username) || normalized_refs.include?(profile_username))
  359. external_reference_detected = (normalized_usernames + normalized_refs).uniq.any? { |value| value != profile_username }
  360. gender_consistency, observed_gender = face_gender_consistency(
  361. people: people,
  362. people_index: people_index,
  363. primary_person_id: historical_primary_person_id
  364. )
  365. age_consistency, observed_age_range = face_age_consistency(
  366. people: people,
  367. people_index: people_index,
  368. primary_person_id: historical_primary_person_id
  369. )
  370. confidence = 0.32
  371. confidence += 0.25 if primary_person_present
  372. confidence += 0.22 if recurring_primary_person
  373. confidence += 0.12 if profile_username_match
  374. confidence += 0.09 if topic_overlap.any?
  375. confidence += 0.06 if gender_consistency == "consistent"
  376. confidence += 0.06 if age_consistency == "consistent"
  377. confidence -= 0.18 if !primary_person_present && people.any?
  378. confidence -= 0.12 if external_reference_detected && !profile_username_match
  379. confidence = confidence.clamp(0.05, 0.98).round(2)
  380. owner_likelihood = if confidence >= 0.68
  381. "high"
  382. elsif confidence >= 0.45
  383. "medium"
  384. else
  385. "low"
  386. end
  387. reason_codes = []
  388. reason_codes << "primary_face_role_detected" if primary_person_present
  389. reason_codes << "historical_primary_person_match" if recurring_primary_person
  390. reason_codes << "profile_username_reference_detected" if profile_username_match
  391. reason_codes << "bio_topic_overlap_detected" if topic_overlap.any?
  392. reason_codes << "external_user_reference_detected" if external_reference_detected
  393. reason_codes << "gender_consistency_#{gender_consistency}" if gender_consistency != "unknown"
  394. reason_codes << "age_consistency_#{age_consistency}" if age_consistency != "unknown"
  395. {
  396. owner_likelihood: owner_likelihood,
  397. confidence: confidence,
  398. primary_person_present: primary_person_present,
  399. recurring_primary_person: recurring_primary_person,
  400. profile_username_match: profile_username_match,
  401. external_reference_detected: external_reference_detected,
  402. bio_topic_overlap: topic_overlap,
  403. observed_gender: observed_gender,
  404. observed_age_range: observed_age_range,
  405. gender_consistency: gender_consistency,
  406. age_consistency: age_consistency,
  407. reason_codes: reason_codes.uniq.first(12)
  408. }
  409. end
  410. def face_gender_consistency(people:, people_index:, primary_person_id:)
  411. observed = Array(people).map { |row| row[:gender].to_s.downcase.presence }.compact
  412. expected = nil
  413. if primary_person_id.present?
  414. person = people_index[primary_person_id]
  415. expected = person&.metadata&.dig("face_attributes", "primary_gender_cue").to_s.downcase.presence
  416. end
  417. return [ "unknown", observed.first ] if expected.blank? || observed.empty?
  418. return [ "consistent", observed.first ] if observed.include?(expected)
  419. [ "inconsistent", observed.first ]
  420. end
  421. def face_age_consistency(people:, people_index:, primary_person_id:)
  422. observed_ranges = Array(people).map { |row| row[:age_range].to_s.presence }.compact
  423. expected = nil
  424. if primary_person_id.present?
  425. person = people_index[primary_person_id]
  426. expected = person&.metadata&.dig("face_attributes", "primary_age_range").to_s.presence
  427. end
  428. return [ "unknown", observed_ranges.first ] if expected.blank? || observed_ranges.empty?
  429. return [ "consistent", observed_ranges.first ] if observed_ranges.include?(expected)
  430. [ "inconsistent", observed_ranges.first ]
  431. end
  432. def normalize_handle_array(values, prefix:)
  433. Array(values).map do |value|
  434. handle = normalize_text(value, max: 64)
  435. next if handle.blank?
  436. clean = handle.delete_prefix(prefix).downcase
  437. next if clean.blank?
  438. "#{prefix}#{clean}"
  439. end.compact.uniq
  440. end
  441. def detect_usernames(mentions:, profile_handles:, ocr_text:, transcript:, metadata:)
  442. rows = []
  443. rows.concat(Array(mentions).map { |value| value.to_s.delete_prefix("@") })
  444. rows.concat(Array(profile_handles))
  445. rows.concat(extract_source_profile_references(metadata: metadata))
  446. [ocr_text, transcript, metadata["caption"], metadata["story_ref"], metadata["story_url"], metadata["permalink"]].each do |text|
  447. next if text.to_s.blank?
  448. text.to_s.scan(OCR_USERNAME_REGEX).each do |match|
  449. rows << match.first.to_s
  450. end
  451. text.to_s.scan(BARE_USERNAME_REGEX).each do |match|
  452. token = match.first.to_s
  453. next unless username_like_token?(token)
  454. rows << token
  455. end
  456. end
  457. rows.map { |value| normalize_username(value) }.reject(&:blank?).uniq
  458. end
  459. def detect_reshare_indicators(ocr_text:, transcript:, metadata:)
  460. corpus = [ocr_text, transcript, metadata["caption"], metadata["story_url"], metadata["permalink"]]
  461. .map(&:to_s)
  462. .join("\n")
  463. return [] if corpus.blank?
  464. RESHARE_PATTERNS.filter_map do |pattern|
  465. match = corpus.match(pattern)
  466. match&.to_s&.downcase
  467. end.uniq
  468. end
  469. def third_party_profile_link_detected?(profile_username:, metadata:)
  470. return false if profile_username.blank?
  471. links = [metadata["story_url"], metadata["permalink"]].map(&:to_s).reject(&:blank?)
  472. return false if links.empty?
  473. links.any? do |link|
  474. next false unless link.include?("instagram.com/")
  475. normalized = link.downcase
  476. normalized.include?("/#{profile_username}/") ? false : normalized.match?(%r{instagram\.com/[a-z0-9._]+/?})
  477. end
  478. end
  479. def detect_meme_markers(ocr_text:, transcript:, metadata:)
  480. corpus = [ocr_text, transcript, metadata["caption"]].map(&:to_s).join("\n")
  481. markers = MEME_PATTERNS.filter_map do |pattern|
  482. match = corpus.match(pattern)
  483. match&.to_s&.downcase
  484. end
  485. text_lines = corpus.lines.map(&:strip).reject(&:blank?)
  486. if text_lines.length >= 2 && corpus.length >= 40
  487. markers << "multi_line_overlay_text"
  488. end
  489. markers.uniq
  490. end
  491. def infer_share_status(profile_username_detected:, external_usernames:, external_source_refs:, reshare_hits:, meme_markers:)
  492. return "owned" if profile_username_detected && external_usernames.empty? && external_source_refs.empty?
  493. return "reshared" if reshare_hits.any? || meme_markers.any?
  494. return "third_party" if external_usernames.any? || external_source_refs.any?
  495. "unknown"
  496. end
  497. def extract_source_profile_references(metadata:)
  498. refs = []
  499. story_ref = metadata["story_ref"].to_s
  500. refs << story_ref.delete_suffix(":") if story_ref.present?
  501. [metadata["story_url"], metadata["permalink"], metadata["media_url"]].each do |value|
  502. url = value.to_s
  503. next if url.blank?
  504. if (match = url.match(%r{instagram\.com/stories/([a-zA-Z0-9._]+)/?}i))
  505. refs << match[1]
  506. end
  507. if (match = url.match(%r{instagram\.com/([a-zA-Z0-9._]+)/?}i))
  508. segment = match[1].to_s.downcase
  509. refs << segment unless RESERVED_IG_SEGMENTS.include?(segment)
  510. end
  511. end
  512. refs
  513. .map { |value| normalize_username(value) }
  514. .reject(&:blank?)
  515. .select { |value| valid_instagram_username?(value) }
  516. .uniq
  517. end
  518. def extract_source_profile_ids(metadata:)
  519. candidates = []
  520. %w[source_profile_id owner_id profile_id user_id source_user_id].each do |key|
  521. value = metadata[key]
  522. candidates << value.to_s if value.to_s.match?(/\A\d+\z/)
  523. end
  524. story_id = metadata["story_id"].to_s
  525. story_id.scan(/(?<!\w)\d{5,}(?!\w)/).each { |token| candidates << token }
  526. candidates.map(&:to_s).reject(&:blank?).uniq.first(10)
  527. end
  528. def score_verified_signals(ocr_text:, transcript:, objects:, object_detections:, scenes:, hashtags:, mentions:, faces:)
  529. score = 0
  530. score += 2 if ocr_text.to_s.present?
  531. score += 2 if transcript.to_s.present?
  532. score += 2 if objects.any? || object_detections.any?
  533. score += 1 if scenes.any?
  534. score += 1 if hashtags.any? || mentions.any?
  535. score += 1 if faces[:primary_user_count].to_i.positive? || faces[:secondary_person_count].to_i.positive?
  536. score
  537. end
  538. def ownership_confidence(label:, reason_codes:, signal_score:)
  539. value = case label.to_s
  540. when "owned_by_profile" then 0.62
  541. when "insufficient_evidence" then 0.9
  542. when "meme_reshare" then 0.9
  543. when "reshare" then 0.86
  544. when "third_party_content" then 0.82
  545. when "unrelated_post" then 0.76
  546. else 0.6
  547. end
  548. value += 0.03 * reason_codes.size
  549. value += 0.02 if signal_score >= 4
  550. value.clamp(0.5, 0.98).round(2)
  551. end
  552. def ownership_summary(label:, external_usernames:, external_source_refs:, reshare_hits:, meme_markers:, primary_faces:, secondary_faces:, signal_score:)
  553. case label.to_s
  554. when "owned_by_profile"
  555. "Validated as likely profile-owned content (signal score #{signal_score})."
  556. when "insufficient_evidence"
  557. "Insufficient verified context (signal score #{signal_score}) to generate a grounded comment."
  558. when "meme_reshare"
  559. hints = (meme_markers.first(2) + reshare_hits.first(2)).uniq.join(", ")
  560. "Likely meme/reshared content#{hints.present? ? " (#{hints})" : ""}; excluded from comment generation."
  561. when "reshare"
  562. hints = reshare_hits.first(3).join(", ")
  563. "Likely reshare/credited content#{hints.present? ? " (#{hints})" : ""}; skipping full comment."
  564. when "third_party_content"
  565. usernames = external_usernames.first(3).join(", ")
  566. refs = external_source_refs.first(3).join(", ")
  567. parts = []
  568. parts << "account references #{usernames}" if usernames.present?
  569. parts << "source refs #{refs}" if refs.present?
  570. "Detected third-party content#{parts.any? ? " (#{parts.join('; ')})" : ""} with non-primary ownership signals."
  571. when "unrelated_post"
  572. "Detected non-primary face signals (primary=#{primary_faces}, secondary=#{secondary_faces}); post may be unrelated."
  573. else
  574. "Ownership could not be validated."
  575. end
  576. end
  577. def extract_profile_bio_topics
  578. bio = @profile&.respond_to?(:bio) ? @profile.bio.to_s.downcase : ""
  579. return [] if bio.blank?
  580. bio.scan(/[a-z0-9_]+/)
  581. .reject { |token| token.length < 3 }
  582. .uniq
  583. .first(30)
  584. end
  585. def normalize_text(value, max:)
  586. text = value.to_s.gsub(/\s+/, " ").strip
  587. return nil if text.blank?
  588. return text if text.length <= max
  589. text.byteslice(0, max)
  590. end
  591. def normalize_username(value)
  592. value.to_s.downcase.strip.delete_prefix("@")
  593. end
  594. def username_like_token?(token)
  595. value = token.to_s
  596. return false unless valid_instagram_username?(value)
  597. return false unless value.include?("_") || value.include?(".")
  598. true
  599. end
  600. def valid_instagram_username?(value)
  601. token = value.to_s.downcase.strip
  602. return false unless token.length.between?(3, 30)
  603. return false unless token.match?(/\A[a-z0-9._]+\z/)
  604. return false if token.include?("instagram.com")
  605. return false if token.start_with?("www.")
  606. return false if RESERVED_IG_SEGMENTS.include?(token)
  607. true
  608. end
  609. end
  610. end

app/services/face_detection_service.rb

0.0% lines covered

100.0% branches covered

321 relevant lines. 0 lines covered and 321 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class FaceDetectionService
  2. DEFAULT_MIN_FACE_CONFIDENCE = ENV.fetch("FACE_DETECTION_MIN_CONFIDENCE", "0.25").to_f
  3. FACE_DUPLICATE_IOU_THRESHOLD = ENV.fetch("FACE_DETECTION_DUPLICATE_IOU_THRESHOLD", "0.85").to_f
  4. def initialize(client: nil, min_face_confidence: nil)
  5. @client = client || build_local_client
  6. @min_face_confidence = begin
  7. value = min_face_confidence.nil? ? DEFAULT_MIN_FACE_CONFIDENCE : min_face_confidence.to_f
  8. value.negative? ? 0.0 : value
  9. rescue StandardError
  10. DEFAULT_MIN_FACE_CONFIDENCE
  11. end
  12. end
  13. def detect(media_payload:)
  14. bytes = media_payload[:image_bytes]
  15. return empty_result(reason: "image_bytes_missing") if bytes.blank?
  16. return empty_result(reason: "local_client_unavailable") unless @client
  17. response = @client.detect_faces_and_ocr!(
  18. image_bytes: bytes,
  19. usage_context: {
  20. workflow: "story_processing",
  21. story_id: media_payload[:story_id].to_s
  22. }
  23. )
  24. parse_response(response)
  25. rescue StandardError => e
  26. empty_result(reason: "vision_error", error_message: e.message)
  27. end
  28. private
  29. def build_local_client
  30. Ai::LocalMicroserviceClient.new
  31. rescue StandardError
  32. nil
  33. end
  34. def parse_response(response)
  35. payload = deep_stringify(response.is_a?(Hash) ? response : {})
  36. nested = payload["results"].is_a?(Hash) ? payload["results"] : {}
  37. text_from_payload = payload.dig("ocr_text").to_s
  38. text_from_payload_blocks = Array(payload["text"]).map { |row| row.is_a?(Hash) ? row["text"].to_s : row.to_s }.map(&:strip).reject(&:blank?).uniq.join("\n")
  39. text_from_nested = Array(nested["text"]).map { |row| row.is_a?(Hash) ? row["text"].to_s : row.to_s }.map(&:strip).reject(&:blank?).uniq.join("\n")
  40. text = [text_from_payload, text_from_payload_blocks, text_from_nested].map(&:strip).reject(&:blank?).join("\n").presence
  41. ocr_blocks = normalize_ocr_blocks(payload: payload, nested: nested)
  42. location_tags = (Array(payload.dig("location_tags")) + Array(nested.dig("location_tags"))).map(&:to_s).map(&:strip).reject(&:blank?).uniq
  43. content_labels = (
  44. Array(payload.dig("content_labels")) +
  45. Array(nested.dig("content_labels")) +
  46. Array(payload["labels"]).map { |row| row.is_a?(Hash) ? (row["label"] || row["description"]) : row } +
  47. Array(nested["labels"]).map { |row| row.is_a?(Hash) ? (row["label"] || row["description"]) : row }
  48. ).map { |value| value.to_s.downcase.strip }.reject(&:blank?).uniq
  49. object_detections = normalize_object_detections(payload: payload, nested: nested)
  50. scenes = (Array(payload.dig("scenes")) + Array(nested["scenes"])).map do |row|
  51. next unless row.is_a?(Hash)
  52. {
  53. timestamp: row["timestamp"] || row[:timestamp],
  54. type: (row["type"] || row[:type]).to_s.presence || "scene_change",
  55. correlation: row["correlation"] || row[:correlation]
  56. }.compact
  57. end.compact.first(80)
  58. mentions = (
  59. Array(payload.dig("mentions")) +
  60. Array(nested.dig("mentions")) +
  61. text.to_s.scan(/@[a-zA-Z0-9._]+/)
  62. ).map(&:to_s).map(&:downcase).uniq
  63. profile_handles = (
  64. Array(payload.dig("profile_handles")) +
  65. Array(nested.dig("profile_handles")) +
  66. text.to_s.scan(/\b[a-zA-Z0-9._]{3,30}\b/)
  67. ).map(&:to_s)
  68. .map(&:downcase)
  69. .select { |token| token.include?("_") || token.include?(".") }
  70. .reject { |token| token.include?("instagram.com") }
  71. .uniq
  72. hashtags = (
  73. Array(payload.dig("hashtags")) +
  74. Array(nested.dig("hashtags")) +
  75. text.to_s.scan(/#[a-zA-Z0-9_]+/)
  76. ).map(&:to_s).map(&:downcase).uniq
  77. raw_faces = (
  78. Array(payload.dig("faces")) +
  79. Array(nested["faces"]) +
  80. Array(payload.dig("faceAnnotations")) +
  81. Array(nested.dig("faceAnnotations"))
  82. )
  83. normalized_faces = raw_faces.map { |face| normalize_face(face) }
  84. filtered_faces = normalized_faces.select { |face| keep_face?(face) }
  85. faces = deduplicate_faces(filtered_faces)
  86. warnings = Array(payload.dig("metadata", "warnings")) + Array(nested.dig("metadata", "warnings"))
  87. metadata_reason = payload.dig("metadata", "reason").to_s.presence || nested.dig("metadata", "reason").to_s.presence
  88. {
  89. faces: faces,
  90. ocr_text: text.presence,
  91. ocr_blocks: ocr_blocks,
  92. location_tags: location_tags.first(20),
  93. content_signals: content_labels.first(30),
  94. object_detections: object_detections.first(60),
  95. scenes: scenes,
  96. mentions: mentions.first(30),
  97. hashtags: hashtags.first(30),
  98. profile_handles: profile_handles.first(30),
  99. metadata: {
  100. source: payload.dig("metadata", "source").to_s.presence || nested.dig("metadata", "source").to_s.presence || "local_ai",
  101. face_count: faces.length,
  102. detected_face_count: raw_faces.length,
  103. filtered_face_count: filtered_faces.length,
  104. dropped_face_count: [ raw_faces.length - faces.length, 0 ].max,
  105. min_face_confidence: @min_face_confidence,
  106. reason: metadata_reason,
  107. warnings: warnings.first(20)
  108. }.compact
  109. }
  110. end
  111. def normalize_face(face)
  112. raw = deep_stringify(face.is_a?(Hash) ? face : {})
  113. bbox = raw.dig("bounding_box") || raw.dig("bbox") || raw.dig("boundingPoly", "vertices")
  114. age_value = raw["age"].to_f
  115. gender_value = raw["gender"].to_s.strip.downcase
  116. gender_value = nil if gender_value.blank?
  117. {
  118. confidence: (raw["confidence"] || raw["score"] || 0).to_f,
  119. bounding_box: normalize_bounding_box(bbox),
  120. landmarks: Array(raw.dig("landmarks") || []).first(12).filter_map do |item|
  121. row = deep_stringify(item)
  122. next unless row.is_a?(Hash)
  123. {
  124. type: (row.dig("type") || row.dig("name") || "UNKNOWN").to_s,
  125. x: row.dig("x") || row.dig("position", "x"),
  126. y: row.dig("y") || row.dig("position", "y"),
  127. z: row.dig("z") || row.dig("position", "z")
  128. }
  129. end,
  130. likelihoods: raw.dig("likelihoods") || {},
  131. age: age_value.positive? ? age_value.round(1) : nil,
  132. age_range: age_value.positive? ? age_range_for(age_value) : nil,
  133. gender: gender_value,
  134. gender_score: raw["gender_score"].to_f
  135. }
  136. end
  137. def age_range_for(age_value)
  138. age = age_value.to_i
  139. return "child" if age < 13
  140. return "teen" if age < 20
  141. return "young_adult" if age < 30
  142. return "adult" if age < 45
  143. return "middle_aged" if age < 60
  144. "senior"
  145. end
  146. def normalize_bounding_box(value)
  147. if value.is_a?(Hash)
  148. row = deep_stringify(value)
  149. if row.key?("x1") && row.key?("y1") && row.key?("x2") && row.key?("y2")
  150. { "x1" => row["x1"].to_f, "y1" => row["y1"].to_f, "x2" => row["x2"].to_f, "y2" => row["y2"].to_f }
  151. elsif row.key?("x") && row.key?("y") && row.key?("width") && row.key?("height")
  152. x = row["x"].to_f
  153. y = row["y"].to_f
  154. width = row["width"].to_f
  155. height = row["height"].to_f
  156. { "x1" => x, "y1" => y, "x2" => x + width, "y2" => y + height }
  157. else
  158. {}
  159. end
  160. elsif value.is_a?(Array) && value.length == 4 && value.first.is_a?(Numeric)
  161. { "x1" => value[0], "y1" => value[1], "x2" => value[2], "y2" => value[3] }
  162. elsif value.is_a?(Array) && value.length == 4 && value.first.is_a?(Hash)
  163. xs = value.map { |pt| pt["x"].to_f }
  164. ys = value.map { |pt| pt["y"].to_f }
  165. { "x1" => xs.min, "y1" => ys.min, "x2" => xs.max, "y2" => ys.max }
  166. elsif value.is_a?(Array) && value.length == 4 && value.first.is_a?(Array)
  167. xs = value.map { |pt| pt[0].to_f }
  168. ys = value.map { |pt| pt[1].to_f }
  169. { "x1" => xs.min, "y1" => ys.min, "x2" => xs.max, "y2" => ys.max }
  170. else
  171. {}
  172. end
  173. end
  174. def empty_result(reason:, error_message: nil)
  175. {
  176. faces: [],
  177. ocr_text: nil,
  178. ocr_blocks: [],
  179. location_tags: [],
  180. content_signals: [],
  181. object_detections: [],
  182. scenes: [],
  183. mentions: [],
  184. hashtags: [],
  185. profile_handles: [],
  186. metadata: {
  187. source: "local_ai",
  188. reason: reason,
  189. error_message: error_message.to_s.presence
  190. }.compact
  191. }
  192. end
  193. def normalize_ocr_blocks(payload:, nested:)
  194. blocks = []
  195. Array(payload.dig("ocr_blocks")).each do |row|
  196. next unless row.is_a?(Hash)
  197. text = row["text"].to_s.strip
  198. next if text.blank?
  199. blocks << {
  200. text: text,
  201. confidence: row["confidence"].to_f,
  202. bbox: normalize_bounding_box(row["bbox"]),
  203. timestamp: row["timestamp"],
  204. source: row["source"].to_s.presence || "ocr"
  205. }.compact
  206. end
  207. if blocks.empty?
  208. (Array(payload["text"]) + Array(nested["text"])).each do |row|
  209. if row.is_a?(Hash)
  210. text = row["text"].to_s.strip
  211. next if text.blank?
  212. blocks << {
  213. text: text,
  214. confidence: row["confidence"].to_f,
  215. bbox: normalize_bounding_box(row["bbox"]),
  216. timestamp: row["timestamp"],
  217. source: row["source"].to_s.presence || "ocr"
  218. }.compact
  219. else
  220. text = row.to_s.strip
  221. next if text.blank?
  222. blocks << {
  223. text: text,
  224. confidence: 0.0,
  225. bbox: {},
  226. source: "ocr"
  227. }
  228. end
  229. end
  230. end
  231. blocks.first(80)
  232. end
  233. def normalize_object_detections(payload:, nested:)
  234. rows = Array(payload.dig("object_detections"))
  235. rows = Array(payload["labels"]) if rows.empty?
  236. rows = Array(nested["labels"]) if rows.empty?
  237. rows.filter_map do |row|
  238. entry = deep_stringify(row)
  239. label = if entry.is_a?(Hash)
  240. (entry["label"] || entry["description"]).to_s.strip
  241. else
  242. entry.to_s.strip
  243. end
  244. next if label.blank?
  245. {
  246. label: label.downcase,
  247. confidence: entry.is_a?(Hash) ? (entry["confidence"] || entry["score"] || entry["max_confidence"]).to_f : 0.0,
  248. bbox: entry.is_a?(Hash) ? normalize_bounding_box(entry["bbox"]) : {},
  249. timestamps: entry.is_a?(Hash) ? Array(entry["timestamps"]).map(&:to_f).first(80) : []
  250. }.compact
  251. end.first(80)
  252. end
  253. def keep_face?(face)
  254. return false unless face.is_a?(Hash)
  255. return false unless valid_bounding_box?(face[:bounding_box])
  256. confidence = face[:confidence].to_f
  257. return false if confidence <= 0.0
  258. confidence >= @min_face_confidence
  259. end
  260. def valid_bounding_box?(bbox)
  261. row = bbox.is_a?(Hash) ? bbox : {}
  262. return false if row.empty?
  263. x1 = row["x1"].to_f
  264. y1 = row["y1"].to_f
  265. x2 = row["x2"].to_f
  266. y2 = row["y2"].to_f
  267. return false unless x2 > x1 && y2 > y1
  268. width = x2 - x1
  269. height = y2 - y1
  270. width.positive? && height.positive?
  271. end
  272. def deduplicate_faces(faces)
  273. accepted = []
  274. Array(faces)
  275. .sort_by { |face| [ -face[:confidence].to_f, -bounding_box_area(face[:bounding_box]) ] }
  276. .each do |face|
  277. duplicate = accepted.any? do |existing|
  278. bounding_box_iou(existing[:bounding_box], face[:bounding_box]) >= FACE_DUPLICATE_IOU_THRESHOLD
  279. end
  280. next if duplicate
  281. accepted << face
  282. end
  283. accepted
  284. end
  285. def bounding_box_area(bbox)
  286. row = bbox.is_a?(Hash) ? bbox : {}
  287. return 0.0 if row.empty?
  288. width = row["x2"].to_f - row["x1"].to_f
  289. height = row["y2"].to_f - row["y1"].to_f
  290. return 0.0 unless width.positive? && height.positive?
  291. width * height
  292. end
  293. def bounding_box_iou(left_bbox, right_bbox)
  294. left = left_bbox.is_a?(Hash) ? left_bbox : {}
  295. right = right_bbox.is_a?(Hash) ? right_bbox : {}
  296. return 0.0 if left.empty? || right.empty?
  297. x_left = [ left["x1"].to_f, right["x1"].to_f ].max
  298. y_top = [ left["y1"].to_f, right["y1"].to_f ].max
  299. x_right = [ left["x2"].to_f, right["x2"].to_f ].min
  300. y_bottom = [ left["y2"].to_f, right["y2"].to_f ].min
  301. inter_width = x_right - x_left
  302. inter_height = y_bottom - y_top
  303. return 0.0 unless inter_width.positive? && inter_height.positive?
  304. intersection = inter_width * inter_height
  305. union = bounding_box_area(left) + bounding_box_area(right) - intersection
  306. return 0.0 unless union.positive?
  307. intersection / union
  308. end
  309. def deep_stringify(value)
  310. case value
  311. when Hash
  312. value.each_with_object({}) do |(key, child), out|
  313. out[key.to_s] = deep_stringify(child)
  314. end
  315. when Array
  316. value.map { |child| deep_stringify(child) }
  317. else
  318. value
  319. end
  320. end
  321. end

app/services/face_embedding_service.rb

0.0% lines covered

100.0% branches covered

77 relevant lines. 0 lines covered and 77 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "base64"
  2. require "digest"
  3. require "json"
  4. require "net/http"
  5. class FaceEmbeddingService
  6. DEFAULT_DIMENSION = 512
  7. REQUEST_TIMEOUT_SECONDS = 8
  8. def initialize(service_url: ENV["FACE_EMBEDDING_SERVICE_URL"], dimension: DEFAULT_DIMENSION)
  9. @service_url = service_url.to_s.strip
  10. @dimension = dimension.to_i.positive? ? dimension.to_i : DEFAULT_DIMENSION
  11. end
  12. def embed(media_payload:, face:)
  13. vector = nil
  14. version = nil
  15. if @service_url.present?
  16. vector = fetch_external_embedding(media_payload: media_payload, face: face)
  17. version = "external_service_v1" if vector.present?
  18. end
  19. if vector.blank?
  20. vector = deterministic_embedding(media_payload: media_payload, face: face)
  21. version = "deterministic_v1"
  22. end
  23. {
  24. vector: normalize(vector),
  25. version: version
  26. }
  27. end
  28. private
  29. def fetch_external_embedding(media_payload:, face:)
  30. uri = URI.parse(@service_url)
  31. req = Net::HTTP::Post.new(uri.request_uri)
  32. req["Content-Type"] = "application/json"
  33. req["Accept"] = "application/json"
  34. req.body = JSON.generate(
  35. image_base64: Base64.strict_encode64(media_payload[:image_bytes].to_s),
  36. bounding_box: face[:bounding_box],
  37. story_id: media_payload[:story_id].to_s
  38. )
  39. http = Net::HTTP.new(uri.host, uri.port)
  40. http.use_ssl = (uri.scheme == "https")
  41. http.open_timeout = REQUEST_TIMEOUT_SECONDS
  42. http.read_timeout = REQUEST_TIMEOUT_SECONDS
  43. res = http.request(req)
  44. return nil unless res.is_a?(Net::HTTPSuccess)
  45. parsed = JSON.parse(res.body.to_s)
  46. embedding = parsed["embedding"]
  47. return nil unless embedding.is_a?(Array) && embedding.any?
  48. embedding.map(&:to_f)
  49. rescue StandardError
  50. nil
  51. end
  52. def deterministic_embedding(media_payload:, face:)
  53. seed = [
  54. media_payload[:story_id].to_s,
  55. face[:bounding_box].to_h.to_json,
  56. Digest::SHA256.hexdigest(media_payload[:image_bytes].to_s.byteslice(0, 8192))
  57. ].join(":")
  58. out = []
  59. i = 0
  60. while out.length < @dimension
  61. digest = Digest::SHA256.digest("#{seed}:#{i}")
  62. digest.bytes.each do |byte|
  63. out << ((byte.to_f / 127.5) - 1.0)
  64. break if out.length >= @dimension
  65. end
  66. i += 1
  67. end
  68. out
  69. end
  70. def normalize(vector)
  71. values = Array(vector).map(&:to_f)
  72. return [] if values.empty?
  73. norm = Math.sqrt(values.sum { |v| v * v })
  74. return values if norm <= 0.0
  75. values.map { |v| (v / norm).round(8) }
  76. end
  77. end

app/services/face_identity_resolution_service.rb

0.0% lines covered

100.0% branches covered

526 relevant lines. 0 lines covered and 526 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class FaceIdentityResolutionService
  2. MIN_PRIMARY_APPEARANCES = 3
  3. MIN_PRIMARY_RATIO = 0.60
  4. FREQUENT_COLLABORATOR_CO_APPEARANCES = 3
  5. VERY_FREQUENT_COLLABORATOR_CO_APPEARANCES = 6
  6. RESERVED_USERNAMES = %w[
  7. instagram
  8. stories
  9. p
  10. reel
  11. reels
  12. tv
  13. explore
  14. accounts
  15. direct
  16. www
  17. com
  18. ].freeze
  19. def resolve_for_post!(post:, extracted_usernames: [], content_summary: {})
  20. return { skipped: true, reason: "post_missing" } unless post&.persisted?
  21. return { skipped: true, reason: "profile_missing" } unless post.instagram_profile
  22. resolve_for_source!(
  23. source: post,
  24. source_type: "post",
  25. profile: post.instagram_profile,
  26. account: post.instagram_account,
  27. extracted_usernames: extracted_usernames,
  28. content_summary: content_summary
  29. )
  30. end
  31. def resolve_for_story!(story:, extracted_usernames: [], content_summary: {})
  32. return { skipped: true, reason: "story_missing" } unless story&.persisted?
  33. return { skipped: true, reason: "profile_missing" } unless story.instagram_profile
  34. resolve_for_source!(
  35. source: story,
  36. source_type: "story",
  37. profile: story.instagram_profile,
  38. account: story.instagram_account,
  39. extracted_usernames: extracted_usernames,
  40. content_summary: content_summary
  41. )
  42. end
  43. private
  44. def resolve_for_source!(source:, source_type:, profile:, account:, extracted_usernames:, content_summary:)
  45. faces = source_faces(source: source, source_type: source_type)
  46. usernames = collect_usernames(
  47. profile: profile,
  48. source: source,
  49. extracted_usernames: extracted_usernames,
  50. content_summary: content_summary
  51. )
  52. participants, unknown_face_count = build_participants(faces)
  53. apply_username_links!(participants: participants, usernames: usernames, profile: profile)
  54. stats = profile_face_stats(profile: profile)
  55. primary_identity = promote_primary_identity!(profile: profile, stats: stats)
  56. participants = refresh_participants_with_latest_people(participants: participants, profile: profile)
  57. apply_username_links!(participants: participants, usernames: usernames, profile: profile)
  58. sync_source_face_roles!(source: source, source_type: source_type)
  59. collaborator_index = build_collaborator_index(profile: profile, primary_person_id: primary_identity[:person_id])
  60. update_collaborator_relationships!(profile: profile, collaborator_index: collaborator_index)
  61. username_matches = map_usernames_to_people(profile: profile, usernames: usernames)
  62. participants = enrich_participants(
  63. participants: participants,
  64. stats: stats,
  65. collaborator_index: collaborator_index
  66. )
  67. participants_payload = participants.map { |row| row.except(:person) }
  68. summary_text = build_summary_text(
  69. profile: profile,
  70. participants: participants_payload,
  71. primary_identity: primary_identity,
  72. usernames: usernames,
  73. unknown_face_count: unknown_face_count
  74. )
  75. summary = {
  76. source_type: source_type,
  77. source_id: source.id,
  78. extracted_usernames: usernames,
  79. unknown_face_count: unknown_face_count,
  80. participants: participants_payload,
  81. primary_identity: primary_identity,
  82. username_face_matches: username_matches,
  83. participant_summary_text: summary_text,
  84. resolved_at: Time.current.iso8601
  85. }
  86. persist_profile_face_identity!(
  87. profile: profile,
  88. primary_identity: primary_identity,
  89. collaborator_index: collaborator_index,
  90. username_matches: username_matches
  91. )
  92. persist_source_summary!(source: source, source_type: source_type, summary: summary)
  93. { skipped: false, summary: summary }
  94. rescue StandardError => e
  95. {
  96. skipped: true,
  97. reason: "face_identity_resolution_error",
  98. error: e.message.to_s
  99. }
  100. end
  101. def source_faces(source:, source_type:)
  102. case source_type
  103. when "post"
  104. source.instagram_post_faces.includes(:instagram_story_person).to_a
  105. when "story"
  106. source.instagram_story_faces.includes(:instagram_story_person).to_a
  107. else
  108. []
  109. end
  110. end
  111. def collect_usernames(profile:, source:, extracted_usernames:, content_summary:)
  112. rows = []
  113. rows.concat(Array(extracted_usernames))
  114. content = content_summary.is_a?(Hash) ? content_summary : {}
  115. rows.concat(Array(content[:mentions] || content["mentions"]))
  116. rows.concat(Array(content[:profile_handles] || content["profile_handles"]))
  117. rows.concat(extract_usernames_from_text(content[:ocr_text] || content["ocr_text"]))
  118. metadata = source.metadata.is_a?(Hash) ? source.metadata : {}
  119. rows.concat(Array(metadata["mentions"]))
  120. rows.concat(Array(metadata["profile_handles"]))
  121. rows.concat(extract_usernames_from_text(metadata["ocr_text"]))
  122. rows.concat(extract_usernames_from_url(metadata["story_url"]))
  123. rows.concat(extract_usernames_from_url(metadata["permalink"]))
  124. if metadata["story_ref"].to_s.present?
  125. rows << metadata["story_ref"].to_s.delete_suffix(":")
  126. end
  127. username = normalize_username(profile.username)
  128. rows << username if username.present?
  129. rows
  130. .map { |value| normalize_username(value) }
  131. .reject(&:blank?)
  132. .uniq
  133. .first(40)
  134. end
  135. def extract_usernames_from_text(text)
  136. value = text.to_s
  137. return [] if value.blank?
  138. usernames = []
  139. value.scan(/@([a-zA-Z0-9._]{2,30})/) { |match| usernames << match.first }
  140. value.scan(/\b([a-zA-Z0-9._]{3,30})\b/) do |match|
  141. token = match.first.to_s
  142. next unless username_like_token?(token)
  143. usernames << token
  144. end
  145. usernames
  146. end
  147. def extract_usernames_from_url(url)
  148. value = url.to_s
  149. return [] if value.blank?
  150. rows = []
  151. if (match = value.match(%r{instagram\.com/stories/([a-zA-Z0-9._]+)/?}i))
  152. rows << match[1]
  153. end
  154. if (match = value.match(%r{instagram\.com/([a-zA-Z0-9._]+)/?}i))
  155. candidate = match[1].to_s
  156. rows << candidate unless RESERVED_USERNAMES.include?(candidate.downcase)
  157. end
  158. rows
  159. end
  160. def build_participants(faces)
  161. unknown = 0
  162. participants = Array(faces).map do |face|
  163. person = face.instagram_story_person
  164. unless person
  165. unknown += 1
  166. next
  167. end
  168. {
  169. person: person,
  170. person_id: person.id,
  171. role: person.role.to_s,
  172. label: person.label.to_s.presence,
  173. match_similarity: face.match_similarity.to_f,
  174. detector_confidence: face.detector_confidence.to_f,
  175. linked_usernames: linked_usernames(person),
  176. real_person_status: person.real_person_status,
  177. identity_confidence: person.identity_confidence,
  178. merged_into_person_id: person.merged_into_person_id
  179. }
  180. end.compact
  181. [ participants, unknown ]
  182. end
  183. def apply_username_links!(participants:, usernames:, profile:)
  184. return if participants.empty?
  185. return if usernames.empty?
  186. primary_username = normalize_username(profile.username)
  187. primary_participant = participants.find { |row| row[:role] == "primary_user" }
  188. if primary_participant && primary_username.present?
  189. update_person_usernames!(primary_participant[:person], [ primary_username ])
  190. end
  191. external = usernames.reject { |value| value == primary_username }
  192. return if external.empty?
  193. by_person_id = participants.index_by { |row| row[:person_id] }
  194. alias_map = username_alias_index(profile: profile)
  195. external.each do |username|
  196. matched_person_id = alias_map[username]
  197. if matched_person_id && by_person_id[matched_person_id]
  198. update_person_usernames!(by_person_id[matched_person_id][:person], [ username ])
  199. next
  200. end
  201. non_primary = participants.reject { |row| row[:role] == "primary_user" }
  202. next unless non_primary.length == 1
  203. update_person_usernames!(non_primary.first[:person], [ username ])
  204. end
  205. end
  206. def update_person_usernames!(person, usernames)
  207. rows = Array(usernames).map { |value| normalize_username(value) }.reject(&:blank?).uniq
  208. return if rows.empty?
  209. meta = person.metadata.is_a?(Hash) ? person.metadata.deep_dup : {}
  210. linked = Array(meta["linked_usernames"]).map { |value| normalize_username(value) }.reject(&:blank?)
  211. updated = (linked + rows).uniq.first(30)
  212. return if updated == linked
  213. observations = meta["username_observations"].is_a?(Hash) ? meta["username_observations"].deep_dup : {}
  214. rows.each { |username| observations[username] = observations[username].to_i + 1 }
  215. meta["linked_usernames"] = updated
  216. meta["username_observations"] = observations
  217. meta["last_username_linked_at"] = Time.current.iso8601
  218. person.update_columns(metadata: meta, updated_at: Time.current)
  219. end
  220. def profile_face_stats(profile:)
  221. story_counts = InstagramStoryFace
  222. .joins(:instagram_story)
  223. .where(instagram_stories: { instagram_profile_id: profile.id })
  224. .where.not(instagram_story_person_id: nil)
  225. .group(:instagram_story_person_id)
  226. .count
  227. post_counts = InstagramPostFace
  228. .joins(:instagram_profile_post)
  229. .where(instagram_profile_posts: { instagram_profile_id: profile.id })
  230. .where.not(instagram_story_person_id: nil)
  231. .group(:instagram_story_person_id)
  232. .count
  233. counts = story_counts.merge(post_counts) { |_id, left, right| left.to_i + right.to_i }
  234. total = counts.values.sum
  235. {
  236. person_counts: counts,
  237. total_faces: total,
  238. people_by_id: profile.instagram_story_people.where(id: counts.keys).index_by(&:id)
  239. }
  240. end
  241. def promote_primary_identity!(profile:, stats:)
  242. counts = stats[:person_counts]
  243. total = stats[:total_faces].to_i
  244. return empty_primary_identity if counts.empty? || total <= 0
  245. top_person_id, top_count = counts.max_by { |_id, count| count.to_i }
  246. top_person = stats[:people_by_id][top_person_id]
  247. return empty_primary_identity unless top_person
  248. ratio = top_count.to_f / total.to_f
  249. confirmed = top_count.to_i >= MIN_PRIMARY_APPEARANCES && ratio >= MIN_PRIMARY_RATIO
  250. primary_person = profile.instagram_story_people.find_by(role: "primary_user")
  251. if confirmed
  252. InstagramStoryPerson.where(instagram_profile_id: profile.id, role: "primary_user").where.not(id: top_person.id).update_all(role: "secondary_person", updated_at: Time.current)
  253. metadata = top_person.metadata.is_a?(Hash) ? top_person.metadata.deep_dup : {}
  254. metadata["primary_identity"] = {
  255. "confirmed" => true,
  256. "dominance_ratio" => ratio.round(4),
  257. "appearance_count" => top_count.to_i,
  258. "updated_at" => Time.current.iso8601
  259. }
  260. top_person.update!(
  261. role: "primary_user",
  262. label: top_person.label.to_s.presence || profile.username.to_s,
  263. metadata: metadata
  264. )
  265. top_person.sync_identity_confidence!
  266. primary_person = top_person
  267. end
  268. candidate = primary_person || top_person
  269. {
  270. person_id: candidate.id,
  271. confirmed: confirmed,
  272. role: candidate.role,
  273. label: candidate.label.to_s.presence,
  274. appearance_count: counts[candidate.id].to_i,
  275. total_faces: total,
  276. dominance_ratio: (counts[candidate.id].to_f / total.to_f).round(4),
  277. linked_usernames: linked_usernames(candidate),
  278. bio_context: bio_context_tokens(profile: profile)
  279. }
  280. end
  281. def build_collaborator_index(profile:, primary_person_id:)
  282. return {} if primary_person_id.blank?
  283. story_rows = InstagramStoryFace
  284. .joins(:instagram_story)
  285. .where(instagram_stories: { instagram_profile_id: profile.id })
  286. .where.not(instagram_story_person_id: nil)
  287. .pluck(:instagram_story_id, :instagram_story_person_id)
  288. post_rows = InstagramPostFace
  289. .joins(:instagram_profile_post)
  290. .where(instagram_profile_posts: { instagram_profile_id: profile.id })
  291. .where.not(instagram_story_person_id: nil)
  292. .pluck(:instagram_profile_post_id, :instagram_story_person_id)
  293. collaborator_counts = Hash.new(0)
  294. story_rows.group_by(&:first).each_value do |rows|
  295. people = rows.map(&:last).uniq
  296. next unless people.include?(primary_person_id)
  297. people.reject { |person_id| person_id == primary_person_id }.each { |person_id| collaborator_counts[person_id] += 1 }
  298. end
  299. post_rows.group_by(&:first).each_value do |rows|
  300. people = rows.map(&:last).uniq
  301. next unless people.include?(primary_person_id)
  302. people.reject { |person_id| person_id == primary_person_id }.each { |person_id| collaborator_counts[person_id] += 1 }
  303. end
  304. collaborator_counts.transform_values do |count|
  305. {
  306. co_appearances_with_primary: count.to_i,
  307. relationship: relationship_for_coappearance(count.to_i)
  308. }
  309. end
  310. end
  311. def update_collaborator_relationships!(profile:, collaborator_index:)
  312. return if collaborator_index.empty?
  313. profile.instagram_story_people.where(id: collaborator_index.keys).find_each do |person|
  314. data = collaborator_index[person.id] || {}
  315. metadata = person.metadata.is_a?(Hash) ? person.metadata.deep_dup : {}
  316. metadata["relationship"] = data[:relationship]
  317. metadata["co_appearances_with_primary"] = data[:co_appearances_with_primary].to_i
  318. metadata["relationship_updated_at"] = Time.current.iso8601
  319. person.update_columns(metadata: metadata, updated_at: Time.current)
  320. end
  321. end
  322. def map_usernames_to_people(profile:, usernames:)
  323. return [] if usernames.empty?
  324. alias_map = username_alias_index(profile: profile)
  325. people = profile.instagram_story_people.where(id: alias_map.values.uniq).index_by(&:id)
  326. usernames.filter_map do |username|
  327. person_id = alias_map[username]
  328. next unless person_id
  329. person = people[person_id]
  330. next unless person
  331. {
  332. username: username,
  333. person_id: person.id,
  334. role: person.role,
  335. label: person.label.to_s.presence,
  336. relationship: person.metadata.is_a?(Hash) ? person.metadata["relationship"].to_s.presence : nil
  337. }.compact
  338. end
  339. end
  340. def username_alias_index(profile:)
  341. map = {}
  342. profile.instagram_story_people.find_each do |person|
  343. aliases = linked_usernames(person)
  344. label_alias = normalize_username(person.label)
  345. aliases << label_alias if label_alias.present?
  346. aliases.uniq.each do |alias_name|
  347. map[alias_name] ||= person.id
  348. end
  349. end
  350. map
  351. end
  352. def enrich_participants(participants:, stats:, collaborator_index:)
  353. counts = stats[:person_counts]
  354. participants.map do |row|
  355. person = row[:person]
  356. collaborator = collaborator_index[person.id] || {}
  357. appearances = counts[person.id].to_i
  358. role = person.role.to_s
  359. row.merge(
  360. role: role,
  361. owner_match: role == "primary_user",
  362. recurring_face: appearances > 1,
  363. appearances: counts[person.id].to_i,
  364. relationship: collaborator[:relationship] || person.metadata&.dig("relationship"),
  365. co_appearances_with_primary: collaborator[:co_appearances_with_primary].to_i,
  366. linked_usernames: linked_usernames(person),
  367. real_person_status: person.real_person_status,
  368. identity_confidence: person.identity_confidence,
  369. merged_into_person_id: person.merged_into_person_id
  370. )
  371. end.uniq { |row| [ row[:person_id], row[:match_similarity].round(4), row[:detector_confidence].round(4) ] }
  372. end
  373. def refresh_participants_with_latest_people(participants:, profile:)
  374. ids = participants.map { |row| row[:person_id] }.compact.uniq
  375. return participants if ids.empty?
  376. by_id = profile.instagram_story_people.where(id: ids).index_by(&:id)
  377. participants.map do |row|
  378. latest = by_id[row[:person_id]]
  379. next row unless latest
  380. row.merge(
  381. person: latest,
  382. role: latest.role.to_s,
  383. label: latest.label.to_s.presence,
  384. linked_usernames: linked_usernames(latest),
  385. real_person_status: latest.real_person_status,
  386. identity_confidence: latest.identity_confidence,
  387. merged_into_person_id: latest.merged_into_person_id
  388. )
  389. end
  390. end
  391. def build_summary_text(profile:, participants:, primary_identity:, usernames:, unknown_face_count:)
  392. parts = []
  393. if primary_identity[:person_id].present?
  394. state = primary_identity[:confirmed] ? "confirmed" : "candidate"
  395. parts << "Primary identity #{state}: #{primary_identity[:label] || profile.username}"
  396. end
  397. if participants.any?
  398. participant_text = participants.first(8).map do |row|
  399. base = row[:label] || "person_#{row[:person_id]}"
  400. rel = row[:relationship].to_s.presence
  401. aliases = Array(row[:linked_usernames]).first(2)
  402. detail = []
  403. detail << rel if rel.present?
  404. detail << "aka #{aliases.join('/') }" if aliases.any?
  405. detail << "seen #{row[:appearances]}x" if row[:appearances].to_i.positive?
  406. detail.empty? ? base : "#{base} (#{detail.join(', ')})"
  407. end
  408. parts << "Participants: #{participant_text.join('; ')}"
  409. end
  410. parts << "Referenced usernames: #{usernames.join(', ')}" if usernames.any?
  411. parts << "Unknown faces: #{unknown_face_count}" if unknown_face_count.to_i.positive?
  412. text = parts.join(". ").strip
  413. text.presence || "No identifiable participants found."
  414. end
  415. def persist_profile_face_identity!(profile:, primary_identity:, collaborator_index:, username_matches:)
  416. record = InstagramProfileBehaviorProfile.find_or_initialize_by(instagram_profile: profile)
  417. summary = record.behavioral_summary.is_a?(Hash) ? record.behavioral_summary.deep_dup : {}
  418. summary["face_identity_profile"] = primary_identity
  419. summary["related_individuals"] = collaborator_index.map do |person_id, row|
  420. person = profile.instagram_story_people.find_by(id: person_id)
  421. {
  422. person_id: person_id,
  423. role: person&.role,
  424. label: person&.label.to_s.presence,
  425. relationship: row[:relationship],
  426. co_appearances_with_primary: row[:co_appearances_with_primary].to_i,
  427. linked_usernames: person ? linked_usernames(person).first(6) : []
  428. }.compact
  429. end
  430. summary["known_username_matches"] = username_matches.first(20)
  431. metadata = record.metadata.is_a?(Hash) ? record.metadata.deep_dup : {}
  432. metadata["face_identity_updated_at"] = Time.current.iso8601
  433. metadata["face_identity_version"] = "v1"
  434. record.activity_score = record.activity_score.to_f if record.activity_score.present?
  435. record.behavioral_summary = summary
  436. record.metadata = metadata
  437. record.save!
  438. end
  439. def persist_source_summary!(source:, source_type:, summary:)
  440. metadata = source.metadata.is_a?(Hash) ? source.metadata.deep_dup : {}
  441. metadata["face_identity"] = summary
  442. metadata["participant_summary"] = summary[:participant_summary_text].to_s
  443. metadata["participants"] = Array(summary[:participants]).first(12)
  444. source.update_columns(metadata: metadata, updated_at: Time.current)
  445. return unless source_type == "story"
  446. return unless source.respond_to?(:source_event)
  447. event = source.source_event
  448. return unless event
  449. event_meta = event.metadata.is_a?(Hash) ? event.metadata.deep_dup : {}
  450. event_meta["face_identity"] = summary
  451. event_meta["participant_summary"] = summary[:participant_summary_text].to_s
  452. event.update_columns(metadata: event_meta, updated_at: Time.current)
  453. rescue StandardError
  454. nil
  455. end
  456. def sync_source_face_roles!(source:, source_type:)
  457. case source_type
  458. when "post"
  459. source.instagram_post_faces.includes(:instagram_story_person).find_each do |face|
  460. next unless face.instagram_story_person
  461. next if face.role.to_s == face.instagram_story_person.role.to_s
  462. face.update_columns(role: face.instagram_story_person.role.to_s, updated_at: Time.current)
  463. end
  464. when "story"
  465. source.instagram_story_faces.includes(:instagram_story_person).find_each do |face|
  466. next unless face.instagram_story_person
  467. next if face.role.to_s == face.instagram_story_person.role.to_s
  468. face.update_columns(role: face.instagram_story_person.role.to_s, updated_at: Time.current)
  469. end
  470. end
  471. rescue StandardError
  472. nil
  473. end
  474. def linked_usernames(person)
  475. data = person.metadata.is_a?(Hash) ? person.metadata : {}
  476. Array(data["linked_usernames"]).map { |value| normalize_username(value) }.reject(&:blank?).uniq.first(20)
  477. end
  478. def relationship_for_coappearance(count)
  479. return "very_frequent_collaborator" if count >= VERY_FREQUENT_COLLABORATOR_CO_APPEARANCES
  480. return "frequent_collaborator" if count >= FREQUENT_COLLABORATOR_CO_APPEARANCES
  481. return "occasional_collaborator" if count.positive?
  482. "unknown"
  483. end
  484. def bio_context_tokens(profile:)
  485. text = [ profile.display_name, profile.bio ].join(" ").downcase
  486. return [] if text.blank?
  487. stopwords = %w[the and for with this that from your our you are]
  488. text.scan(/[a-z0-9_]+/)
  489. .reject { |token| token.length < 3 || stopwords.include?(token) }
  490. .uniq
  491. .first(20)
  492. end
  493. def empty_primary_identity
  494. {
  495. person_id: nil,
  496. confirmed: false,
  497. role: "unknown",
  498. label: nil,
  499. appearance_count: 0,
  500. total_faces: 0,
  501. dominance_ratio: 0.0,
  502. linked_usernames: [],
  503. bio_context: []
  504. }
  505. end
  506. def normalize_username(value)
  507. token = value.to_s.strip.downcase
  508. return nil if token.blank?
  509. token = token.sub(%r{https?://(www\.)?instagram\.com/}i, "")
  510. token = token.split("/").first.to_s
  511. token = token.delete_prefix("@").delete_prefix("#")
  512. token = token.delete_suffix(":")
  513. token = token.gsub(/[^a-z0-9._]/, "")
  514. return nil if token.blank?
  515. return nil if RESERVED_USERNAMES.include?(token)
  516. return nil unless token.length.between?(2, 30)
  517. token
  518. end
  519. def username_like_token?(token)
  520. value = token.to_s.downcase
  521. return false unless value.match?(/\A[a-z0-9._]{3,30}\z/)
  522. return false if RESERVED_USERNAMES.include?(value)
  523. return false if value.include?("instagram.com")
  524. value.include?("_") || value.include?(".")
  525. end
  526. end

app/services/instagram/authentication_required_error.rb

0.0% lines covered

100.0% branches covered

4 relevant lines. 0 lines covered and 4 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Instagram
  2. class AuthenticationRequiredError < StandardError
  3. end
  4. end

app/services/instagram/avatar_url_normalizer.rb

0.0% lines covered

100.0% branches covered

37 relevant lines. 0 lines covered and 37 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "cgi"
  2. require "uri"
  3. module Instagram
  4. class AvatarUrlNormalizer
  5. PLACEHOLDER_PATTERNS = [
  6. /\/static\/images\/profile\//i,
  7. /profile-pic-null/i,
  8. /default[_-]?profile/i
  9. ].freeze
  10. class << self
  11. def normalize(raw_url)
  12. url = CGI.unescapeHTML(raw_url.to_s).strip
  13. return nil if url.blank?
  14. if url.start_with?("//")
  15. url = "https:#{url}"
  16. elsif url.start_with?("/")
  17. return nil
  18. elsif !url.match?(%r{\Ahttps?://}i)
  19. return nil unless url.match?(%r{\A[a-z0-9.-]+\.[a-z]{2,}([/:]|$)}i)
  20. url = "https://#{url}"
  21. end
  22. uri = URI.parse(url)
  23. return nil unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
  24. return nil if uri.host.to_s.blank?
  25. return nil if placeholder_path?(uri.path.to_s)
  26. uri.to_s
  27. rescue URI::InvalidURIError, ArgumentError
  28. nil
  29. end
  30. def placeholder_path?(path)
  31. normalized = path.to_s.downcase
  32. return false if normalized.blank?
  33. PLACEHOLDER_PATTERNS.any? { |pattern| normalized.match?(pattern) }
  34. end
  35. end
  36. end
  37. end

app/services/instagram/client.rb

0.0% lines covered

100.0% branches covered

6278 relevant lines. 0 lines covered and 6278 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "selenium-webdriver"
  2. require "fileutils"
  3. require "time"
  4. require "net/http"
  5. require "json"
  6. require "cgi"
  7. require "base64"
  8. require "digest"
  9. require "stringio"
  10. require "set"
  11. module Instagram
  12. class Client
  13. INSTAGRAM_BASE_URL = "https://www.instagram.com".freeze
  14. DEBUG_CAPTURE_DIR = Rails.root.join("log", "instagram_debug").freeze
  15. STORY_INTERACTION_RETRY_DAYS = 3
  16. PROFILE_FEED_PAGE_SIZE = 30
  17. PROFILE_FEED_MAX_PAGES = 120
  18. PROFILE_FEED_BROWSER_ITEM_CAP = 500
  19. def initialize(account:)
  20. @account = account
  21. end
  22. def manual_login!(timeout_seconds: 180)
  23. with_driver(headless: false) do |driver|
  24. driver.navigate.to("#{INSTAGRAM_BASE_URL}/accounts/login/")
  25. wait_for_manual_login!(driver: driver, timeout_seconds: timeout_seconds)
  26. persist_session_bundle!(driver)
  27. @account.login_state = "authenticated"
  28. @account.save!
  29. end
  30. end
  31. def validate_session!
  32. SessionValidationService.new(
  33. account: @account,
  34. with_driver: method(:with_driver),
  35. wait_for: method(:wait_for),
  36. logger: defined?(Rails) ? Rails.logger : nil
  37. ).call
  38. end
  39. # Primary sync: followers/following lists (plus inbox to mark known-messageable threads).
  40. #
  41. # Returns stats hash suitable for storing in SyncRun.
  42. def sync_follow_graph!
  43. SyncFollowGraphService.new(
  44. account: @account,
  45. with_recoverable_session: method(:with_recoverable_session),
  46. with_authenticated_driver: method(:with_authenticated_driver),
  47. collect_conversation_users: method(:collect_conversation_users),
  48. collect_story_users: method(:collect_story_users),
  49. collect_follow_list: method(:collect_follow_list),
  50. upsert_follow_list: method(:upsert_follow_list!)
  51. ).call
  52. end
  53. # Captures "home feed" post identifiers that appear while scrolling.
  54. #
  55. # This does NOT auto-like or auto-comment. It only records posts, downloads media (temporarily),
  56. # and queues analysis. Interaction should remain a user-confirmed action in the UI.
  57. def capture_home_feed_posts!(rounds: 4, delay_seconds: 45, max_new: 20)
  58. with_recoverable_session(label: "feed_capture") do
  59. with_authenticated_driver do |driver|
  60. with_task_capture(driver: driver, task_name: "feed_capture_home", meta: { rounds: rounds, delay_seconds: delay_seconds, max_new: max_new }) do
  61. driver.navigate.to(INSTAGRAM_BASE_URL)
  62. wait_for(driver, css: "body", timeout: 12)
  63. dismiss_common_overlays!(driver)
  64. seen = 0
  65. new_posts = 0
  66. rounds.to_i.clamp(1, 25).times do |i|
  67. dismiss_common_overlays!(driver)
  68. items = extract_feed_items_from_dom(driver)
  69. now = Time.current
  70. items.each do |it|
  71. sc = it[:shortcode].to_s.strip
  72. next if sc.blank?
  73. seen += 1
  74. post = @account.instagram_posts.find_or_initialize_by(shortcode: sc)
  75. is_new = post.new_record?
  76. post.detected_at ||= now
  77. post.post_kind = it[:post_kind].presence || post.post_kind.presence || "unknown"
  78. post.author_username = it[:author_username].presence || post.author_username
  79. post.media_url = it[:media_url].presence || post.media_url
  80. post.caption = it[:caption].presence || post.caption
  81. post.metadata = (post.metadata || {}).merge(it[:metadata] || {}).merge(round: i + 1)
  82. post.save! if post.changed?
  83. if is_new
  84. new_posts += 1
  85. # Download media and analyze (best effort).
  86. DownloadInstagramPostMediaJob.perform_later(instagram_post_id: post.id) if post.media_url.present?
  87. AnalyzeInstagramPostJob.perform_later(instagram_post_id: post.id)
  88. end
  89. break if new_posts >= max_new.to_i.clamp(1, 200)
  90. end
  91. break if new_posts >= max_new.to_i.clamp(1, 200)
  92. # Scroll down a bit.
  93. driver.execute_script("window.scrollBy(0, Math.max(700, window.innerHeight * 0.85));")
  94. sleep(delay_seconds.to_i.clamp(10, 120))
  95. end
  96. { seen_posts: seen, new_posts: new_posts }
  97. end
  98. end
  99. end
  100. end
  101. # Full Selenium automation flow:
  102. # - navigate home feed
  103. # - optionally engage one story first (hold/freeze until reply)
  104. # - find image posts, download media, store profile history, analyze, generate comment, post first suggestion
  105. # - capture HTML/JSON/screenshot artifacts at each step
  106. def auto_engage_home_feed!(max_posts: 3, include_story: true, story_hold_seconds: 18)
  107. max_posts_i = max_posts.to_i.clamp(1, 10)
  108. include_story_bool = ActiveModel::Type::Boolean.new.cast(include_story)
  109. hold_seconds_i = story_hold_seconds.to_i.clamp(8, 40)
  110. with_recoverable_session(label: "auto_engage_home_feed") do
  111. with_authenticated_driver do |driver|
  112. with_task_capture(
  113. driver: driver,
  114. task_name: "auto_engage_home_feed_start",
  115. meta: { max_posts: max_posts_i, include_story: include_story_bool, story_hold_seconds: hold_seconds_i }
  116. ) do
  117. driver.navigate.to(INSTAGRAM_BASE_URL)
  118. wait_for(driver, css: "body", timeout: 12)
  119. dismiss_common_overlays!(driver)
  120. capture_task_html(driver: driver, task_name: "auto_engage_home_loaded", status: "ok")
  121. story_result =
  122. if include_story_bool
  123. auto_engage_first_story!(driver: driver, story_hold_seconds: hold_seconds_i)
  124. else
  125. { attempted: false, replied: false }
  126. end
  127. driver.navigate.to(INSTAGRAM_BASE_URL)
  128. wait_for(driver, css: "body", timeout: 12)
  129. dismiss_common_overlays!(driver)
  130. sleep(0.6)
  131. capture_task_html(driver: driver, task_name: "auto_engage_home_before_posts", status: "ok")
  132. feed_items = extract_feed_items_from_dom(driver).select do |item|
  133. item[:post_kind] == "post" &&
  134. item[:shortcode].to_s.present? &&
  135. item[:media_url].to_s.start_with?("http://", "https://")
  136. end
  137. capture_task_html(
  138. driver: driver,
  139. task_name: "auto_engage_posts_discovered",
  140. status: "ok",
  141. meta: { discovered_posts: feed_items.length, max_posts: max_posts_i }
  142. )
  143. processed = 0
  144. commented = 0
  145. details = []
  146. feed_items.each do |item|
  147. break if processed >= max_posts_i
  148. processed += 1
  149. begin
  150. result = auto_engage_feed_post!(driver: driver, item: item)
  151. details << result
  152. commented += 1 if result[:comment_posted] == true
  153. rescue StandardError => e
  154. details << {
  155. shortcode: item[:shortcode],
  156. username: item[:author_username],
  157. comment_posted: false,
  158. error: e.message.to_s
  159. }
  160. end
  161. end
  162. {
  163. story_replied: story_result[:replied] == true,
  164. posts_commented: commented,
  165. posts_processed: processed,
  166. details: details
  167. }
  168. end
  169. end
  170. end
  171. end
  172. # Homepage carousel-based story sync:
  173. # - open home page and story tray
  174. # - process up to N stories from carousel using "Next" button
  175. # - for image stories: download, store, analyze, (optional) post generated comment
  176. # - for video stories: download + persist to ActiveStorage and ingest for later processing
  177. # - capture per-step HTML/JSON/screenshot artifacts for DOM troubleshooting
  178. def sync_home_story_carousel!(story_limit: 10, auto_reply_only: false)
  179. limit = story_limit.to_i.clamp(1, 50)
  180. tagged_only = ActiveModel::Type::Boolean.new.cast(auto_reply_only)
  181. with_recoverable_session(label: "sync_home_story_carousel") do
  182. with_authenticated_driver do |driver|
  183. with_task_capture(
  184. driver: driver,
  185. task_name: "home_story_sync_start",
  186. meta: { story_limit: limit, auto_reply_only: tagged_only }
  187. ) do
  188. driver.navigate.to(INSTAGRAM_BASE_URL)
  189. wait_for(driver, css: "body", timeout: 12)
  190. dismiss_common_overlays!(driver)
  191. capture_task_html(driver: driver, task_name: "home_story_sync_home_loaded", status: "ok")
  192. open_first_story_from_home_carousel!(driver: driver)
  193. wait_for(driver, css: "body", timeout: 12)
  194. freeze_story_progress!(driver)
  195. capture_task_html(driver: driver, task_name: "home_story_sync_opened_first_story", status: "ok")
  196. stats = { stories_visited: 0, downloaded: 0, analyzed: 0, commented: 0, reacted: 0, skipped_video: 0, skipped_not_tagged: 0, skipped_ads: 0, skipped_invalid_media: 0, skipped_unreplyable: 0, skipped_out_of_network: 0, skipped_interaction_retry: 0, skipped_reshared_external_link: 0, failed: 0 }
  197. visited_refs = {}
  198. story_api_cache = {}
  199. safety_limit = limit * 5
  200. exit_reason = "safety_limit_exhausted"
  201. account_profile = find_or_create_profile_for_auto_engagement!(username: @account.username)
  202. started_at = Time.current
  203. account_profile.record_event!(
  204. kind: "story_sync_started",
  205. external_id: "story_sync_started:home_carousel:#{started_at.utc.iso8601(6)}",
  206. occurred_at: started_at,
  207. metadata: { source: "home_story_carousel", story_limit: limit, auto_reply_only: tagged_only }
  208. )
  209. safety_limit.times do
  210. if stats[:stories_visited] >= limit
  211. exit_reason = "limit_reached"
  212. break
  213. end
  214. context = normalized_story_context_for_processing(driver: driver, context: current_story_context(driver))
  215. if context[:story_url_recovery_needed]
  216. recover_story_url_context!(driver: driver, username: context[:username], reason: "fallback_profile_url")
  217. context = normalized_story_context_for_processing(driver: driver, context: current_story_context(driver))
  218. end
  219. ref = context[:ref].presence || context[:story_key].to_s
  220. if ref.blank?
  221. capture_task_html(
  222. driver: driver,
  223. task_name: "home_story_sync_story_context_missing",
  224. status: "error",
  225. meta: {
  226. current_url: driver.current_url.to_s,
  227. page_title: driver.title.to_s,
  228. resolved_username: context[:username],
  229. resolved_story_id: context[:story_id]
  230. }
  231. )
  232. fallback_username = context[:username].presence || @account.username.to_s
  233. if fallback_username.present?
  234. fallback_profile = find_or_create_profile_for_auto_engagement!(username: fallback_username)
  235. fallback_profile.record_event!(
  236. kind: "story_sync_failed",
  237. external_id: "story_sync_failed:context_missing:#{Time.current.utc.iso8601(6)}",
  238. occurred_at: Time.current,
  239. metadata: {
  240. source: "home_story_carousel",
  241. reason: "story_context_missing",
  242. current_url: driver.current_url.to_s,
  243. page_title: driver.title.to_s
  244. }
  245. )
  246. end
  247. exit_reason = "story_context_missing"
  248. break
  249. end
  250. story_key = context[:story_key].presence || ref
  251. if visited_refs[story_key]
  252. capture_task_html(
  253. driver: driver,
  254. task_name: "home_story_sync_duplicate_story_key",
  255. status: "error",
  256. meta: {
  257. story_key: story_key,
  258. ref: ref,
  259. current_url: driver.current_url.to_s
  260. }
  261. )
  262. moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
  263. if moved
  264. next
  265. end
  266. exit_reason = "duplicate_story_key_no_progress"
  267. break
  268. end
  269. visited_refs[story_key] = true
  270. story_id = normalize_story_id_token(context[:story_id])
  271. story_id = normalize_story_id_token(ref.to_s.split(":")[1].to_s) if story_id.blank?
  272. story_id = normalize_story_id_token(current_story_reference(driver.current_url.to_s).to_s.split(":")[1].to_s) if story_id.blank?
  273. story_url = canonical_story_url(
  274. username: context[:username],
  275. story_id: story_id,
  276. fallback_url: driver.current_url.to_s
  277. )
  278. stats[:stories_visited] += 1
  279. freeze_story_progress!(driver)
  280. capture_task_html(
  281. driver: driver,
  282. task_name: "home_story_sync_story_loaded",
  283. status: "ok",
  284. meta: { ref: ref, story_key: story_key, username: context[:username], story_id: story_id, current_url: story_url }
  285. )
  286. if story_id.blank?
  287. stats[:failed] += 1
  288. fallback_profile = find_or_create_profile_for_auto_engagement!(username: context[:username].presence || @account.username.to_s)
  289. fallback_profile.record_event!(
  290. kind: "story_sync_failed",
  291. external_id: "story_sync_failed:missing_story_id:#{Time.current.utc.iso8601(6)}",
  292. occurred_at: Time.current,
  293. metadata: {
  294. source: "home_story_carousel",
  295. reason: "story_id_unresolved",
  296. story_ref: ref,
  297. story_key: story_key,
  298. story_url: story_url
  299. }
  300. )
  301. moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
  302. unless moved
  303. exit_reason = "next_navigation_failed"
  304. break
  305. end
  306. next
  307. end
  308. profile = find_story_network_profile(username: context[:username])
  309. if profile.nil?
  310. stats[:skipped_out_of_network] += 1
  311. account_profile.record_event!(
  312. kind: "story_reply_skipped",
  313. external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
  314. occurred_at: Time.current,
  315. metadata: {
  316. source: "home_story_carousel",
  317. story_id: story_id,
  318. story_ref: ref,
  319. story_url: story_url,
  320. reason: "profile_not_in_network",
  321. status: "Out of network",
  322. username: context[:username].to_s
  323. }
  324. )
  325. capture_task_html(
  326. driver: driver,
  327. task_name: "home_story_sync_out_of_network_skipped",
  328. status: "ok",
  329. meta: {
  330. story_id: story_id,
  331. story_ref: ref,
  332. username: context[:username].to_s,
  333. reason: "profile_not_in_network"
  334. }
  335. )
  336. moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
  337. unless moved
  338. exit_reason = "next_navigation_failed"
  339. break
  340. end
  341. next
  342. end
  343. if profile_interaction_retry_pending?(profile)
  344. stats[:skipped_interaction_retry] += 1
  345. stats[:skipped_unreplyable] += 1
  346. profile.record_event!(
  347. kind: "story_reply_skipped",
  348. external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
  349. occurred_at: Time.current,
  350. metadata: {
  351. source: "home_story_carousel",
  352. story_id: story_id,
  353. story_ref: ref,
  354. story_url: story_url,
  355. reason: "interaction_retry_window_active",
  356. status: "Interaction unavailable (retry pending)",
  357. retry_after_at: profile.story_interaction_retry_after_at&.iso8601,
  358. interaction_state: profile.story_interaction_state.to_s,
  359. interaction_reason: profile.story_interaction_reason.to_s
  360. }
  361. )
  362. capture_task_html(
  363. driver: driver,
  364. task_name: "home_story_sync_interaction_retry_skipped",
  365. status: "ok",
  366. meta: {
  367. story_id: story_id,
  368. story_ref: ref,
  369. retry_after_at: profile.story_interaction_retry_after_at&.iso8601,
  370. interaction_state: profile.story_interaction_state.to_s,
  371. interaction_reason: profile.story_interaction_reason.to_s
  372. }
  373. )
  374. moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
  375. unless moved
  376. exit_reason = "next_navigation_failed"
  377. break
  378. end
  379. next
  380. end
  381. media = resolve_story_media_for_current_context(
  382. driver: driver,
  383. username: context[:username],
  384. story_id: story_id,
  385. fallback_story_key: story_key,
  386. cache: story_api_cache
  387. )
  388. if media[:url].to_s.blank?
  389. stats[:failed] += 1
  390. profile.record_event!(
  391. kind: "story_sync_failed",
  392. external_id: "story_sync_failed:#{story_id}:#{Time.current.utc.iso8601(6)}",
  393. occurred_at: Time.current,
  394. metadata: {
  395. source: "home_story_carousel",
  396. reason: "api_story_media_unavailable",
  397. story_id: story_id,
  398. story_ref: ref,
  399. story_url: story_url,
  400. media_source: media[:source].to_s,
  401. media_variant_count: media[:media_variant_count].to_i
  402. }
  403. )
  404. moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
  405. unless moved
  406. exit_reason = "next_navigation_failed"
  407. break
  408. end
  409. next
  410. end
  411. media_story_id_hint = story_id_hint_from_media_url(media[:url])
  412. if media_story_id_hint.present? && media_story_id_hint != story_id
  413. stats[:failed] += 1
  414. profile.record_event!(
  415. kind: "story_sync_failed",
  416. external_id: "story_sync_failed:#{story_id}:#{Time.current.utc.iso8601(6)}",
  417. occurred_at: Time.current,
  418. metadata: {
  419. source: "home_story_carousel",
  420. reason: "story_media_story_id_mismatch",
  421. expected_story_id: story_id,
  422. media_story_id: media_story_id_hint,
  423. story_ref: ref,
  424. story_url: story_url,
  425. media_source: media[:source].to_s,
  426. media_url: media[:url].to_s
  427. }
  428. )
  429. moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
  430. unless moved
  431. exit_reason = "next_navigation_failed"
  432. break
  433. end
  434. next
  435. end
  436. ad_context = detect_story_ad_context(driver: driver, media: media)
  437. capture_task_html(
  438. driver: driver,
  439. task_name: "home_story_sync_story_probe",
  440. status: "ok",
  441. meta: {
  442. story_id: story_id,
  443. story_ref: ref,
  444. story_key: story_key,
  445. username: context[:username],
  446. ad_detected: ad_context[:ad_detected],
  447. ad_reason: ad_context[:reason],
  448. ad_marker_text: ad_context[:marker_text],
  449. ad_signal_source: ad_context[:signal_source],
  450. ad_signal_confidence: ad_context[:signal_confidence],
  451. ad_debug_hint: ad_context[:debug_hint],
  452. media_source: media[:source],
  453. media_type: media[:media_type],
  454. media_url: media[:url].to_s.byteslice(0, 500),
  455. media_width: media[:width],
  456. media_height: media[:height],
  457. media_variant_count: media[:media_variant_count].to_i,
  458. primary_media_source: media[:primary_media_source].to_s,
  459. primary_media_index: media[:primary_media_index],
  460. carousel_media_count: Array(media[:carousel_media]).length
  461. }
  462. )
  463. if ad_context[:ad_detected]
  464. stats[:skipped_ads] += 1
  465. profile.record_event!(
  466. kind: "story_ad_skipped",
  467. external_id: "story_ad_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
  468. occurred_at: Time.current,
  469. metadata: {
  470. source: "home_story_carousel",
  471. story_id: story_id,
  472. story_ref: ref,
  473. story_url: story_url,
  474. reason: ad_context[:reason],
  475. marker_text: ad_context[:marker_text]
  476. }
  477. )
  478. capture_task_html(
  479. driver: driver,
  480. task_name: "home_story_sync_ad_skipped",
  481. status: "ok",
  482. meta: {
  483. story_id: story_id,
  484. story_ref: ref,
  485. reason: ad_context[:reason],
  486. marker_text: ad_context[:marker_text]
  487. }
  488. )
  489. moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
  490. unless moved
  491. exit_reason = "next_navigation_failed"
  492. break
  493. end
  494. next
  495. end
  496. api_external_context = story_external_profile_link_context_from_api(
  497. username: context[:username],
  498. story_id: story_id,
  499. cache: story_api_cache
  500. )
  501. if api_external_context[:known] && api_external_context[:has_external_profile_link]
  502. stats[:skipped_reshared_external_link] += 1
  503. profile.record_event!(
  504. kind: "story_reply_skipped",
  505. external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
  506. occurred_at: Time.current,
  507. metadata: {
  508. source: "home_story_carousel",
  509. story_id: story_id,
  510. story_ref: ref,
  511. story_url: story_url,
  512. reason: api_external_context[:reason_code].to_s.presence || "api_external_profile_indicator",
  513. status: "External attribution detected (API)",
  514. linked_username: api_external_context[:linked_username],
  515. linked_profile_url: api_external_context[:linked_profile_url],
  516. marker_text: api_external_context[:marker_text],
  517. linked_targets: Array(api_external_context[:linked_targets])
  518. }
  519. )
  520. capture_task_html(
  521. driver: driver,
  522. task_name: "home_story_sync_external_profile_link_skipped",
  523. status: "ok",
  524. meta: {
  525. story_id: story_id,
  526. story_ref: ref,
  527. linked_username: api_external_context[:linked_username],
  528. linked_profile_url: api_external_context[:linked_profile_url],
  529. marker_text: api_external_context[:marker_text],
  530. linked_targets: Array(api_external_context[:linked_targets]),
  531. reason_code: api_external_context[:reason_code]
  532. }
  533. )
  534. moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
  535. unless moved
  536. exit_reason = "next_navigation_failed"
  537. break
  538. end
  539. next
  540. end
  541. api_reply_gate = story_reply_capability_from_api(username: context[:username], story_id: story_id)
  542. if api_reply_gate[:known] && api_reply_gate[:reply_possible] == false
  543. stats[:skipped_unreplyable] += 1
  544. retry_after = Time.current + STORY_INTERACTION_RETRY_DAYS.days
  545. mark_profile_interaction_state!(
  546. profile: profile,
  547. state: "unavailable",
  548. reason: api_reply_gate[:reason_code].to_s.presence || "api_can_reply_false",
  549. reaction_available: false,
  550. retry_after_at: retry_after
  551. )
  552. profile.record_event!(
  553. kind: "story_reply_skipped",
  554. external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
  555. occurred_at: Time.current,
  556. metadata: {
  557. source: "home_story_carousel",
  558. story_id: story_id,
  559. story_ref: ref,
  560. story_url: story_url,
  561. reason: api_reply_gate[:reason_code],
  562. status: api_reply_gate[:status],
  563. retry_after_at: retry_after.iso8601
  564. }
  565. )
  566. moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
  567. unless moved
  568. exit_reason = "next_navigation_failed"
  569. break
  570. end
  571. next
  572. end
  573. reply_gate =
  574. if api_reply_gate[:known] && api_reply_gate[:reply_possible] == true
  575. { reply_possible: true, reason_code: nil, status: api_reply_gate[:status], marker_text: "", submission_reason: "api_can_reply_true" }
  576. else
  577. check_story_reply_capability(driver: driver)
  578. end
  579. unless reply_gate[:reply_possible]
  580. reaction_result = react_to_story_if_available!(driver: driver)
  581. if reaction_result[:reacted]
  582. stats[:reacted] += 1
  583. mark_profile_interaction_state!(
  584. profile: profile,
  585. state: "reaction_only",
  586. reason: reply_gate[:reason_code].to_s.presence || "reply_unavailable_reaction_available",
  587. reaction_available: true
  588. )
  589. profile.record_event!(
  590. kind: "story_reaction_sent",
  591. external_id: "story_reaction_sent:#{story_id}:#{Time.current.utc.iso8601(6)}",
  592. occurred_at: Time.current,
  593. metadata: {
  594. source: "home_story_carousel",
  595. story_id: story_id,
  596. story_ref: ref,
  597. story_url: story_url,
  598. reaction_reason: reaction_result[:reason],
  599. reaction_marker_text: reaction_result[:marker_text],
  600. reply_gate_reason: reply_gate[:reason_code]
  601. }
  602. )
  603. capture_task_html(
  604. driver: driver,
  605. task_name: "home_story_sync_reaction_fallback_sent",
  606. status: "ok",
  607. meta: {
  608. story_id: story_id,
  609. story_ref: ref,
  610. reaction_reason: reaction_result[:reason],
  611. reaction_marker_text: reaction_result[:marker_text],
  612. reply_gate_reason: reply_gate[:reason_code]
  613. }
  614. )
  615. moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
  616. unless moved
  617. exit_reason = "next_navigation_failed"
  618. break
  619. end
  620. next
  621. end
  622. stats[:skipped_unreplyable] += 1
  623. retry_after = Time.current + STORY_INTERACTION_RETRY_DAYS.days
  624. mark_profile_interaction_state!(
  625. profile: profile,
  626. state: "unavailable",
  627. reason: reply_gate[:reason_code].to_s.presence || "reply_unavailable",
  628. reaction_available: false,
  629. retry_after_at: retry_after
  630. )
  631. profile.record_event!(
  632. kind: "story_reply_skipped",
  633. external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
  634. occurred_at: Time.current,
  635. metadata: {
  636. source: "home_story_carousel",
  637. story_id: story_id,
  638. story_ref: ref,
  639. story_url: story_url,
  640. reason: reply_gate[:reason_code],
  641. status: reply_gate[:status],
  642. submission_reason: reply_gate[:submission_reason],
  643. submission_marker_text: reply_gate[:marker_text],
  644. retry_after_at: retry_after.iso8601,
  645. reaction_fallback_attempted: true,
  646. reaction_fallback_reason: reaction_result[:reason],
  647. reaction_fallback_marker_text: reaction_result[:marker_text]
  648. }
  649. )
  650. capture_task_html(
  651. driver: driver,
  652. task_name: "home_story_sync_reply_precheck_skipped",
  653. status: "ok",
  654. meta: {
  655. story_id: story_id,
  656. story_ref: ref,
  657. reason: reply_gate[:reason_code],
  658. status_text: reply_gate[:status],
  659. marker_text: reply_gate[:marker_text],
  660. retry_after_at: retry_after.iso8601,
  661. reaction_fallback_reason: reaction_result[:reason],
  662. reaction_fallback_marker_text: reaction_result[:marker_text]
  663. }
  664. )
  665. moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
  666. unless moved
  667. exit_reason = "next_navigation_failed"
  668. break
  669. end
  670. next
  671. end
  672. mark_profile_interaction_state!(
  673. profile: profile,
  674. state: "reply_available",
  675. reason: "reply_box_found",
  676. reaction_available: nil,
  677. retry_after_at: nil
  678. )
  679. story_time = Time.current
  680. profile.record_event!(
  681. kind: "story_uploaded",
  682. external_id: "story_uploaded:#{story_id}",
  683. occurred_at: nil,
  684. metadata: {
  685. source: "home_story_carousel",
  686. story_id: story_id,
  687. story_ref: ref,
  688. story_url: story_url
  689. }
  690. )
  691. profile.record_event!(
  692. kind: "story_viewed",
  693. external_id: "story_viewed:#{story_id}:#{story_time.utc.iso8601(6)}",
  694. occurred_at: story_time,
  695. metadata: {
  696. source: "home_story_carousel",
  697. story_id: story_id,
  698. story_ref: ref,
  699. story_url: story_url
  700. }
  701. )
  702. if media[:media_type].to_s == "video"
  703. begin
  704. download = download_media_with_metadata(url: media[:url], user_agent: @account.user_agent)
  705. stats[:downloaded] += 1
  706. now = Time.current
  707. downloaded_event = profile.record_event!(
  708. kind: "story_downloaded",
  709. external_id: "story_downloaded:#{story_id}:#{now.utc.iso8601(6)}",
  710. occurred_at: now,
  711. metadata: {
  712. source: "home_story_carousel",
  713. story_id: story_id,
  714. story_ref: ref,
  715. story_url: story_url,
  716. media_type: "video",
  717. media_source: media[:source],
  718. media_url: media[:url],
  719. image_url: media[:image_url],
  720. video_url: media[:video_url],
  721. media_width: media[:width],
  722. media_height: media[:height],
  723. owner_user_id: media[:owner_user_id],
  724. owner_username: media[:owner_username],
  725. api_media_variant_count: media[:media_variant_count].to_i,
  726. api_primary_media_source: media[:primary_media_source].to_s,
  727. api_primary_media_index: media[:primary_media_index],
  728. api_carousel_media: compact_story_media_variants_for_metadata(media[:carousel_media]),
  729. media_content_type: download[:content_type],
  730. media_bytes: download[:bytes].bytesize
  731. }
  732. )
  733. downloaded_event.media.attach(io: StringIO.new(download[:bytes]), filename: download[:filename], content_type: download[:content_type])
  734. InstagramProfileEvent.broadcast_story_archive_refresh!(account: @account)
  735. StoryIngestionService.new(account: @account, profile: profile).ingest!(
  736. story: {
  737. story_id: story_id,
  738. media_type: "video",
  739. media_url: media[:url],
  740. image_url: nil,
  741. video_url: media[:url],
  742. caption: nil,
  743. permalink: story_url,
  744. taken_at: story_time
  745. },
  746. source_event: downloaded_event,
  747. bytes: download[:bytes],
  748. content_type: download[:content_type],
  749. filename: download[:filename]
  750. )
  751. rescue StandardError => e
  752. stats[:failed] += 1
  753. profile.record_event!(
  754. kind: "story_sync_failed",
  755. external_id: "story_sync_failed:#{story_id}:#{Time.current.utc.iso8601(6)}",
  756. occurred_at: Time.current,
  757. metadata: { source: "home_story_carousel", story_ref: ref, error_class: e.class.name, error_message: e.message }
  758. )
  759. end
  760. stats[:skipped_video] += 1
  761. next unless click_next_story_in_carousel!(driver: driver, current_ref: ref)
  762. next
  763. end
  764. duplicate_reply = story_already_replied?(
  765. profile: profile,
  766. story_id: story_id,
  767. story_ref: ref,
  768. story_url: story_url,
  769. media_url: media[:url]
  770. )
  771. if duplicate_reply[:found]
  772. profile.record_event!(
  773. kind: "story_reply_skipped",
  774. external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
  775. occurred_at: Time.current,
  776. metadata: {
  777. source: "home_story_carousel",
  778. story_id: story_id,
  779. story_ref: ref,
  780. story_url: story_url,
  781. reason: "duplicate_story_already_replied",
  782. matched_by: duplicate_reply[:matched_by],
  783. matched_event_external_id: duplicate_reply[:matched_external_id]
  784. }
  785. )
  786. capture_task_html(
  787. driver: driver,
  788. task_name: "home_story_sync_duplicate_reply_skipped",
  789. status: "ok",
  790. meta: {
  791. story_id: story_id,
  792. story_ref: ref,
  793. matched_by: duplicate_reply[:matched_by],
  794. matched_event_external_id: duplicate_reply[:matched_external_id]
  795. }
  796. )
  797. moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
  798. unless moved
  799. exit_reason = "next_navigation_failed"
  800. break
  801. end
  802. next
  803. end
  804. begin
  805. download = download_media_with_metadata(url: media[:url], user_agent: @account.user_agent)
  806. stats[:downloaded] += 1
  807. quality = evaluate_story_image_quality(download: download, media: media)
  808. if quality[:skip]
  809. stats[:skipped_invalid_media] += 1
  810. profile.record_event!(
  811. kind: "story_reply_skipped",
  812. external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
  813. occurred_at: Time.current,
  814. metadata: {
  815. source: "home_story_carousel",
  816. story_id: story_id,
  817. story_ref: ref,
  818. story_url: story_url,
  819. reason: "invalid_story_media",
  820. quality_reason: quality[:reason],
  821. quality_entropy: quality[:entropy],
  822. media_type: media[:media_type],
  823. media_width: media[:width],
  824. media_height: media[:height],
  825. media_content_type: download[:content_type],
  826. media_bytes: download[:bytes].bytesize
  827. }
  828. )
  829. capture_task_html(
  830. driver: driver,
  831. task_name: "home_story_sync_invalid_media_skipped",
  832. status: "ok",
  833. meta: {
  834. story_id: story_id,
  835. story_ref: ref,
  836. quality_reason: quality[:reason],
  837. quality_entropy: quality[:entropy],
  838. media_content_type: download[:content_type],
  839. media_bytes: download[:bytes].bytesize
  840. }
  841. )
  842. moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
  843. unless moved
  844. exit_reason = "next_navigation_failed"
  845. break
  846. end
  847. next
  848. end
  849. now = Time.current
  850. downloaded_event = profile.record_event!(
  851. kind: "story_downloaded",
  852. external_id: "story_downloaded:#{story_id}:#{now.utc.iso8601(6)}",
  853. occurred_at: now,
  854. metadata: {
  855. source: "home_story_carousel",
  856. story_id: story_id,
  857. story_ref: ref,
  858. story_url: story_url,
  859. media_type: "image",
  860. media_source: media[:source],
  861. media_url: media[:url],
  862. image_url: media[:image_url],
  863. video_url: media[:video_url],
  864. media_width: media[:width],
  865. media_height: media[:height],
  866. owner_user_id: media[:owner_user_id],
  867. owner_username: media[:owner_username],
  868. api_media_variant_count: media[:media_variant_count].to_i,
  869. api_primary_media_source: media[:primary_media_source].to_s,
  870. api_primary_media_index: media[:primary_media_index],
  871. api_carousel_media: compact_story_media_variants_for_metadata(media[:carousel_media]),
  872. media_content_type: download[:content_type],
  873. media_bytes: download[:bytes].bytesize
  874. }
  875. )
  876. downloaded_event.media.attach(io: StringIO.new(download[:bytes]), filename: download[:filename], content_type: download[:content_type])
  877. InstagramProfileEvent.broadcast_story_archive_refresh!(account: @account)
  878. payload = build_auto_engagement_post_payload(
  879. profile: profile,
  880. shortcode: story_id,
  881. caption: nil,
  882. permalink: story_url,
  883. include_story_history: true
  884. )
  885. analysis = analyze_for_auto_engagement!(
  886. analyzable: downloaded_event,
  887. payload: payload,
  888. bytes: download[:bytes],
  889. content_type: download[:content_type],
  890. source_url: media[:url]
  891. )
  892. stats[:analyzed] += 1 if analysis.present?
  893. suggestions = generate_comment_suggestions_from_analysis!(profile: profile, payload: payload, analysis: analysis)
  894. comment_text = suggestions.first.to_s.strip
  895. capture_task_html(
  896. driver: driver,
  897. task_name: "home_story_sync_comment_generation",
  898. status: comment_text.present? ? "ok" : "error",
  899. meta: { story_ref: ref, suggestions_count: suggestions.length, comment_preview: comment_text.byteslice(0, 120) }
  900. )
  901. if tagged_only && !profile_auto_reply_enabled?(profile)
  902. stats[:skipped_not_tagged] += 1
  903. profile.record_event!(
  904. kind: "story_reply_skipped",
  905. external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
  906. occurred_at: Time.current,
  907. metadata: { source: "home_story_carousel", story_id: story_id, story_ref: ref, story_url: story_url, reason: "missing_auto_reply_tag" }
  908. )
  909. elsif comment_text.blank?
  910. profile.record_event!(
  911. kind: "story_reply_skipped",
  912. external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
  913. occurred_at: Time.current,
  914. metadata: { source: "home_story_carousel", story_id: story_id, story_ref: ref, story_url: story_url, reason: "no_comment_generated" }
  915. )
  916. else
  917. comment_result = comment_on_story_via_api!(
  918. story_id: story_id,
  919. story_username: context[:username],
  920. comment_text: comment_text
  921. )
  922. if !comment_result[:posted]
  923. comment_result = comment_on_story_via_ui!(driver: driver, comment_text: comment_text)
  924. end
  925. posted = comment_result[:posted]
  926. skip_status = story_reply_skip_status_for(comment_result)
  927. capture_task_html(
  928. driver: driver,
  929. task_name: "home_story_sync_comment_submission",
  930. status: posted ? "ok" : "error",
  931. meta: {
  932. story_ref: ref,
  933. comment_preview: comment_text.byteslice(0, 120),
  934. posted: posted,
  935. submission_method: comment_result[:method],
  936. failure_reason: comment_result[:reason],
  937. skip_status: skip_status[:status],
  938. skip_reason_code: skip_status[:reason_code]
  939. }
  940. )
  941. if posted
  942. stats[:commented] += 1
  943. mark_profile_interaction_state!(
  944. profile: profile,
  945. state: "reply_available",
  946. reason: "comment_sent",
  947. reaction_available: nil,
  948. retry_after_at: nil
  949. )
  950. profile.record_event!(
  951. kind: "story_reply_sent",
  952. external_id: "story_reply_sent:#{story_id}",
  953. occurred_at: Time.current,
  954. metadata: {
  955. source: "home_story_carousel",
  956. story_id: story_id,
  957. story_ref: ref,
  958. story_url: story_url,
  959. media_url: media[:url],
  960. comment_text: comment_text,
  961. submission_method: comment_result[:method]
  962. }
  963. )
  964. attach_reply_comment_to_downloaded_event!(downloaded_event: downloaded_event, comment_text: comment_text)
  965. else
  966. profile.record_event!(
  967. kind: "story_reply_skipped",
  968. external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
  969. occurred_at: Time.current,
  970. metadata: {
  971. source: "home_story_carousel",
  972. story_id: story_id,
  973. story_ref: ref,
  974. story_url: story_url,
  975. reason: skip_status[:reason_code],
  976. status: skip_status[:status],
  977. submission_reason: comment_result[:reason],
  978. submission_marker_text: comment_result[:marker_text]
  979. }
  980. )
  981. end
  982. end
  983. rescue StandardError => e
  984. stats[:failed] += 1
  985. profile.record_event!(
  986. kind: "story_sync_failed",
  987. external_id: "story_sync_failed:#{story_id}:#{Time.current.utc.iso8601(6)}",
  988. occurred_at: Time.current,
  989. metadata: { source: "home_story_carousel", story_id: story_id, story_ref: ref, story_url: story_url, error_class: e.class.name, error_message: e.message }
  990. )
  991. end
  992. moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
  993. unless moved
  994. exit_reason = "next_navigation_failed"
  995. break
  996. end
  997. end
  998. if stats[:stories_visited].zero?
  999. stats[:failed] += 1
  1000. capture_task_html(
  1001. driver: driver,
  1002. task_name: "home_story_sync_no_progress",
  1003. status: "error",
  1004. meta: {
  1005. reason: "loop_exited_without_story_processing",
  1006. current_url: driver.current_url.to_s,
  1007. page_title: driver.title.to_s,
  1008. stats: stats
  1009. }
  1010. )
  1011. account_profile.record_event!(
  1012. kind: "story_sync_failed",
  1013. external_id: "story_sync_failed:no_progress:#{Time.current.utc.iso8601(6)}",
  1014. occurred_at: Time.current,
  1015. metadata: {
  1016. source: "home_story_carousel",
  1017. reason: "loop_exited_without_story_processing",
  1018. current_url: driver.current_url.to_s,
  1019. page_title: driver.title.to_s
  1020. }
  1021. )
  1022. end
  1023. capture_task_html(
  1024. driver: driver,
  1025. task_name: "home_story_sync_end_state",
  1026. status: "ok",
  1027. meta: {
  1028. reason: exit_reason,
  1029. story_limit: limit,
  1030. stats: stats,
  1031. current_url: driver.current_url.to_s
  1032. }
  1033. )
  1034. account_profile.record_event!(
  1035. kind: "story_sync_completed",
  1036. external_id: "story_sync_completed:home_carousel:#{Time.current.utc.iso8601(6)}",
  1037. occurred_at: Time.current,
  1038. metadata: {
  1039. source: "home_story_carousel",
  1040. story_limit: limit,
  1041. auto_reply_only: tagged_only,
  1042. stats: stats,
  1043. end_reason: exit_reason
  1044. }
  1045. )
  1046. stats
  1047. end
  1048. end
  1049. end
  1050. end
  1051. def send_messages!(usernames:, message_text:)
  1052. BulkMessageSendService.new(
  1053. with_recoverable_session: method(:with_recoverable_session),
  1054. with_authenticated_driver: method(:with_authenticated_driver),
  1055. find_profile_for_interaction: method(:find_profile_for_interaction),
  1056. dm_interaction_retry_pending: method(:dm_interaction_retry_pending?),
  1057. send_direct_message_via_api: method(:send_direct_message_via_api!),
  1058. mark_profile_dm_state: method(:mark_profile_dm_state!),
  1059. apply_dm_state_from_send_result: method(:apply_dm_state_from_send_result),
  1060. disconnected_session_error: method(:disconnected_session_error?),
  1061. open_dm: method(:open_dm),
  1062. send_text_message_from_driver: method(:send_text_message_from_driver!)
  1063. ).call(usernames: usernames, message_text: message_text)
  1064. end
  1065. def send_message_to_user!(username:, message_text:)
  1066. SingleMessageSendService.new(
  1067. with_recoverable_session: method(:with_recoverable_session),
  1068. with_authenticated_driver: method(:with_authenticated_driver),
  1069. with_task_capture: method(:with_task_capture),
  1070. find_profile_for_interaction: method(:find_profile_for_interaction),
  1071. dm_interaction_retry_pending: method(:dm_interaction_retry_pending?),
  1072. send_direct_message_via_api: method(:send_direct_message_via_api!),
  1073. mark_profile_dm_state: method(:mark_profile_dm_state!),
  1074. apply_dm_state_from_send_result: method(:apply_dm_state_from_send_result),
  1075. open_dm: method(:open_dm),
  1076. send_text_message_from_driver: method(:send_text_message_from_driver!)
  1077. ).call(username: username, message_text: message_text)
  1078. end
  1079. # API-first DM text send. Falls back to UI from caller when this returns sent=false.
  1080. #
  1081. # Request pattern mirrors story reply thread usage:
  1082. # - resolve user_id from username
  1083. # - create/get direct thread id
  1084. # - POST /api/v1/direct_v2/threads/broadcast/text/
  1085. def send_direct_message_via_api!(username:, message_text:)
  1086. text = message_text.to_s.strip
  1087. return { sent: false, method: "api", reason: "blank_message_text" } if text.blank?
  1088. uname = normalize_username(username)
  1089. return { sent: false, method: "api", reason: "blank_username" } if uname.blank?
  1090. user_id = story_user_id_for(username: uname)
  1091. return { sent: false, method: "api", reason: "missing_user_id" } if user_id.blank?
  1092. thread_id = direct_thread_id_for_user(user_id: user_id)
  1093. return { sent: false, method: "api", reason: "missing_thread_id" } if thread_id.blank?
  1094. body = ig_api_post_form_json(
  1095. path: "/api/v1/direct_v2/threads/broadcast/text/",
  1096. referer: "#{INSTAGRAM_BASE_URL}/direct/t/#{thread_id}/",
  1097. form: {
  1098. action: "send_item",
  1099. client_context: story_api_client_context,
  1100. thread_id: thread_id,
  1101. text: text
  1102. }
  1103. )
  1104. return { sent: false, method: "api", reason: "empty_api_response" } unless body.is_a?(Hash)
  1105. status = body["status"].to_s
  1106. if status == "ok"
  1107. return {
  1108. sent: true,
  1109. method: "api",
  1110. reason: "text_sent",
  1111. api_status: status,
  1112. api_thread_id: body.dig("payload", "thread_id").to_s.presence || thread_id,
  1113. api_item_id: body.dig("payload", "item_id").to_s.presence
  1114. }
  1115. end
  1116. {
  1117. sent: false,
  1118. method: "api",
  1119. reason: body["message"].to_s.presence || body.dig("payload", "message").to_s.presence || body["error_type"].to_s.presence || "api_status_#{status.presence || 'unknown'}",
  1120. api_status: status.presence || "unknown",
  1121. api_http_status: body["_http_status"],
  1122. api_error_code: body.dig("payload", "error_code").to_s.presence || body["error_code"].to_s.presence
  1123. }
  1124. rescue StandardError => e
  1125. { sent: false, method: "api", reason: "api_exception:#{e.class.name}" }
  1126. end
  1127. def post_comment_to_media!(media_id:, shortcode:, comment_text:)
  1128. text = comment_text.to_s.strip
  1129. raise "Comment cannot be blank" if text.blank?
  1130. raise "Media id is required to post comment" if media_id.to_s.strip.blank?
  1131. raise "Post shortcode is required" if shortcode.to_s.strip.blank?
  1132. with_recoverable_session(label: "post_comment") do
  1133. with_authenticated_driver do |driver|
  1134. with_task_capture(
  1135. driver: driver,
  1136. task_name: "post_comment_open_post",
  1137. meta: { shortcode: shortcode.to_s, media_id: media_id.to_s }
  1138. ) do
  1139. driver.navigate.to("#{INSTAGRAM_BASE_URL}/p/#{shortcode}/")
  1140. wait_for(driver, css: "body", timeout: 12)
  1141. dismiss_common_overlays!(driver)
  1142. end
  1143. payload = post_comment_via_api_from_browser_context(
  1144. driver: driver,
  1145. media_id: media_id.to_s.strip,
  1146. comment_text: text
  1147. )
  1148. parsed = parse_comment_api_payload(payload)
  1149. return parsed[:body].merge("method" => "api", "media_id" => media_id.to_s) if parsed[:ok]
  1150. # IG has started rejecting this endpoint on some sessions/builds with 403.
  1151. # Fallback to visible UI interaction to preserve "Forward Post" behavior.
  1152. capture_task_html(
  1153. driver: driver,
  1154. task_name: "post_comment_api_failed_fallback_ui",
  1155. status: "error",
  1156. meta: {
  1157. shortcode: shortcode.to_s,
  1158. media_id: media_id.to_s,
  1159. api_status: parsed[:status],
  1160. api_error: parsed[:error_message],
  1161. api_response_preview: parsed[:response_preview]
  1162. }
  1163. )
  1164. posted = comment_on_post_via_ui!(driver: driver, shortcode: shortcode.to_s, comment_text: text)
  1165. raise "Instagram comment API returned HTTP #{parsed[:status]}; UI fallback also failed" unless posted
  1166. {
  1167. "status" => "ok",
  1168. "method" => "ui_fallback",
  1169. "api_status" => parsed[:status],
  1170. "api_error" => parsed[:error_message],
  1171. "media_id" => media_id.to_s
  1172. }
  1173. end
  1174. end
  1175. end
  1176. # Returns profile-specific mutual friends by using the target user's followers API and
  1177. # keeping only followers the current account already follows.
  1178. #
  1179. # Endpoint:
  1180. # GET /api/v1/friendships/<target_user_id>/followers/
  1181. def fetch_mutual_friends(profile_username:, limit: 36)
  1182. max_results = limit.to_i.clamp(1, 100)
  1183. fetch_mutual_friends_via_api(profile_username: profile_username, limit: max_results)
  1184. rescue StandardError => e
  1185. Rails.logger.warn("Instagram fetch_mutual_friends failed for #{profile_username}: #{e.class}: #{e.message}")
  1186. []
  1187. end
  1188. def fetch_profile_details!(username:)
  1189. with_recoverable_session(label: "fetch_profile_details") do
  1190. with_authenticated_driver do |driver|
  1191. fetch_profile_details_from_driver(driver, username: username)
  1192. end
  1193. end
  1194. end
  1195. def fetch_profile_details_and_verify_messageability!(username:)
  1196. with_recoverable_session(label: "fetch_profile_details_and_verify_messageability") do
  1197. with_authenticated_driver do |driver|
  1198. details = fetch_profile_details_from_driver(driver, username: username)
  1199. eligibility = verify_messageability_from_api(username: username)
  1200. if eligibility[:can_message].nil?
  1201. eligibility = verify_messageability_from_driver(driver, username: username)
  1202. end
  1203. details.merge(eligibility)
  1204. end
  1205. end
  1206. end
  1207. def verify_messageability!(username:)
  1208. with_recoverable_session(label: "verify_messageability") do
  1209. result = verify_messageability_from_api(username: username)
  1210. return result if !result.is_a?(Hash) || !result[:can_message].nil?
  1211. with_authenticated_driver do |driver|
  1212. verify_messageability_from_driver(driver, username: username)
  1213. end
  1214. end
  1215. end
  1216. def fetch_profile_analysis_dataset!(username:, posts_limit: nil, comments_limit: 8)
  1217. ProfileAnalysisDatasetService.new(
  1218. fetch_profile_details: method(:fetch_profile_details!),
  1219. fetch_web_profile_info: method(:fetch_web_profile_info),
  1220. fetch_profile_feed_items_for_analysis: method(:fetch_profile_feed_items_for_analysis),
  1221. extract_post_for_analysis: method(:extract_post_for_analysis),
  1222. enrich_missing_post_comments_via_browser: method(:enrich_missing_post_comments_via_browser!),
  1223. normalize_username: method(:normalize_username)
  1224. ).call(username: username, posts_limit: posts_limit, comments_limit: comments_limit)
  1225. end
  1226. def fetch_profile_feed_items_for_analysis(username:, user_id:, posts_limit:)
  1227. http_result = fetch_profile_feed_items_via_http(
  1228. username: username,
  1229. user_id: user_id,
  1230. posts_limit: posts_limit
  1231. )
  1232. return http_result if Array(http_result[:items]).any?
  1233. browser_result = fetch_profile_feed_items_via_browser_context(
  1234. username: username,
  1235. user_id_hint: user_id,
  1236. posts_limit: posts_limit
  1237. )
  1238. return browser_result if Array(browser_result[:items]).any?
  1239. http_result.merge(
  1240. browser_fallback_attempted: true,
  1241. browser_fallback_error: browser_result[:error].to_s.presence
  1242. )
  1243. end
  1244. def fetch_profile_feed_items_via_http(username:, user_id:, posts_limit:)
  1245. limit = posts_limit.to_i if posts_limit.present?
  1246. limit = nil if limit.to_i <= 0
  1247. return { source: "http_feed_api", user_id: nil, pages_fetched: 0, items: [] } if user_id.to_s.blank?
  1248. remaining = limit
  1249. max_id = nil
  1250. pages = 0
  1251. items = []
  1252. seen_max_ids = Set.new
  1253. seen_item_keys = Set.new
  1254. more_available = false
  1255. loop do
  1256. break if pages >= PROFILE_FEED_MAX_PAGES
  1257. break if remaining.present? && remaining <= 0
  1258. break if max_id.present? && seen_max_ids.include?(max_id)
  1259. seen_max_ids << max_id if max_id.present?
  1260. count = remaining.present? ? [remaining, PROFILE_FEED_PAGE_SIZE].min : PROFILE_FEED_PAGE_SIZE
  1261. feed = fetch_user_feed(user_id: user_id, referer_username: username, count: count, max_id: max_id)
  1262. break unless feed.is_a?(Hash)
  1263. page_items = Array(feed["items"]).select { |item| item.is_a?(Hash) }
  1264. break if page_items.empty?
  1265. pages += 1
  1266. deduped = dedupe_profile_feed_items(items: page_items, seen_keys: seen_item_keys, max_items: remaining)
  1267. items.concat(deduped)
  1268. remaining -= deduped.length if remaining.present?
  1269. next_max_id = feed["next_max_id"].to_s.strip.presence
  1270. more_available = ActiveModel::Type::Boolean.new.cast(feed["more_available"])
  1271. max_id = next_max_id
  1272. break if max_id.blank?
  1273. end
  1274. {
  1275. source: "http_feed_api",
  1276. user_id: user_id.to_s,
  1277. pages_fetched: pages,
  1278. final_max_id: max_id,
  1279. more_available: more_available,
  1280. items: limit.present? ? items.first(limit) : items
  1281. }
  1282. rescue StandardError => e
  1283. {
  1284. source: "http_feed_api",
  1285. user_id: user_id.to_s.presence,
  1286. pages_fetched: 0,
  1287. error: e.message.to_s,
  1288. items: []
  1289. }
  1290. end
  1291. def fetch_profile_feed_items_via_browser_context(username:, user_id_hint:, posts_limit:)
  1292. limit = posts_limit.to_i if posts_limit.present?
  1293. limit = nil if limit.to_i <= 0
  1294. max_items = limit.present? ? limit : PROFILE_FEED_BROWSER_ITEM_CAP
  1295. with_recoverable_session(label: "profile_analysis_posts_browser_fallback") do
  1296. with_authenticated_driver do |driver|
  1297. driver.navigate.to("#{INSTAGRAM_BASE_URL}/#{username}/")
  1298. wait_for(driver, css: "body", timeout: 10)
  1299. dismiss_common_overlays!(driver)
  1300. payload =
  1301. driver.execute_async_script(
  1302. <<~JS,
  1303. const username = String(arguments[0] || "").trim();
  1304. const userIdHint = String(arguments[1] || "").trim();
  1305. const maxItems = Math.max(1, Number(arguments[2] || 0));
  1306. const pageSize = Math.max(1, Number(arguments[3] || 30));
  1307. const maxPages = Math.max(1, Number(arguments[4] || 100));
  1308. const done = arguments[arguments.length - 1];
  1309. const out = {
  1310. source: "browser_feed_api",
  1311. user_id: null,
  1312. pages_fetched: 0,
  1313. final_max_id: null,
  1314. items: [],
  1315. error: null
  1316. };
  1317. const readJson = async (path) => {
  1318. const resp = await fetch(path, {
  1319. method: "GET",
  1320. credentials: "include",
  1321. headers: {
  1322. "Accept": "application/json, text/plain, */*",
  1323. "X-Requested-With": "XMLHttpRequest"
  1324. }
  1325. });
  1326. if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${path}`);
  1327. return await resp.json();
  1328. };
  1329. (async () => {
  1330. try {
  1331. let userId = userIdHint;
  1332. if (!userId) {
  1333. const profile = await readJson(`/api/v1/users/web_profile_info/?username=${encodeURIComponent(username)}`);
  1334. userId = String((profile && profile.data && profile.data.user && profile.data.user.id) || "").trim();
  1335. }
  1336. if (!userId) {
  1337. out.error = "browser_profile_user_id_missing";
  1338. done(out);
  1339. return;
  1340. }
  1341. out.user_id = userId;
  1342. let maxId = "";
  1343. let remaining = maxItems;
  1344. const seenCursors = new Set();
  1345. for (let page = 0; page < maxPages; page += 1) {
  1346. if (remaining <= 0) break;
  1347. if (maxId && seenCursors.has(maxId)) break;
  1348. if (maxId) seenCursors.add(maxId);
  1349. const count = Math.min(pageSize, remaining);
  1350. const query = new URLSearchParams({ count: String(count) });
  1351. if (maxId) query.set("max_id", maxId);
  1352. const feed = await readJson(`/api/v1/feed/user/${encodeURIComponent(userId)}/?${query.toString()}`);
  1353. const pageItems = Array.isArray(feed && feed.items) ? feed.items : [];
  1354. if (pageItems.length === 0) break;
  1355. out.items.push(...pageItems);
  1356. out.pages_fetched += 1;
  1357. remaining -= pageItems.length;
  1358. const nextMaxId = String((feed && feed.next_max_id) || "").trim();
  1359. if (!nextMaxId || nextMaxId === maxId) {
  1360. maxId = nextMaxId;
  1361. break;
  1362. }
  1363. maxId = nextMaxId;
  1364. }
  1365. out.final_max_id = maxId || null;
  1366. } catch (error) {
  1367. out.error = String((error && error.message) || error || "browser_feed_fetch_failed");
  1368. }
  1369. done(out);
  1370. })();
  1371. JS
  1372. username.to_s,
  1373. user_id_hint.to_s,
  1374. max_items,
  1375. PROFILE_FEED_PAGE_SIZE,
  1376. PROFILE_FEED_MAX_PAGES
  1377. )
  1378. payload_hash = payload.is_a?(Hash) ? payload : {}
  1379. seen_item_keys = Set.new
  1380. deduped = dedupe_profile_feed_items(
  1381. items: Array(payload_hash["items"]),
  1382. seen_keys: seen_item_keys,
  1383. max_items: limit
  1384. )
  1385. {
  1386. source: payload_hash["source"].to_s.presence || "browser_feed_api",
  1387. user_id: payload_hash["user_id"].to_s.presence,
  1388. pages_fetched: payload_hash["pages_fetched"].to_i,
  1389. final_max_id: payload_hash["final_max_id"].to_s.presence,
  1390. error: payload_hash["error"].to_s.presence,
  1391. items: deduped
  1392. }
  1393. end
  1394. end
  1395. rescue StandardError => e
  1396. {
  1397. source: "browser_feed_api",
  1398. user_id: user_id_hint.to_s.presence,
  1399. pages_fetched: 0,
  1400. error: e.message.to_s,
  1401. items: []
  1402. }
  1403. end
  1404. def dedupe_profile_feed_items(items:, seen_keys:, max_items: nil)
  1405. out = []
  1406. Array(items).each do |item|
  1407. next unless item.is_a?(Hash)
  1408. key =
  1409. item["pk"].to_s.presence ||
  1410. item["id"].to_s.presence ||
  1411. item["code"].to_s.presence ||
  1412. item["shortcode"].to_s.presence
  1413. key ||= Digest::SHA256.hexdigest(item.to_json)
  1414. next if key.blank? || seen_keys.include?(key)
  1415. seen_keys << key
  1416. out << item
  1417. break if max_items.present? && out.length >= max_items.to_i
  1418. end
  1419. out
  1420. end
  1421. def fetch_profile_story_dataset!(username:, stories_limit: 20)
  1422. username = normalize_username(username)
  1423. raise "Username cannot be blank" if username.blank?
  1424. details = fetch_profile_details!(username: username)
  1425. web_info = fetch_web_profile_info(username)
  1426. user = web_info.is_a?(Hash) ? web_info.dig("data", "user") : nil
  1427. user_id = user.is_a?(Hash) ? user["id"].to_s.strip : ""
  1428. reel = user_id.present? ? fetch_story_reel(user_id: user_id, referer_username: username) : nil
  1429. raw_items =
  1430. if reel.is_a?(Hash)
  1431. Array(reel["items"])
  1432. else
  1433. []
  1434. end
  1435. stories = raw_items.first(stories_limit.to_i.clamp(1, 30)).filter_map do |item|
  1436. extract_story_item(item, username: username, reel_owner_id: user_id)
  1437. end
  1438. {
  1439. profile: details,
  1440. user_id: user_id.presence,
  1441. stories: stories,
  1442. fetched_at: Time.current
  1443. }
  1444. end
  1445. private
  1446. def post_comment_via_api_from_browser_context(driver:, media_id:, comment_text:)
  1447. driver.execute_async_script(
  1448. <<~JS,
  1449. const mediaId = arguments[0];
  1450. const comment = arguments[1];
  1451. const done = arguments[arguments.length - 1];
  1452. const body = new URLSearchParams();
  1453. body.set("comment_text", comment);
  1454. const readCookie = (name) => {
  1455. try {
  1456. const cookie = document.cookie || "";
  1457. const parts = cookie.split(";").map((v) => v.trim());
  1458. const hit = parts.find((v) => v.startsWith(name + "="));
  1459. if (!hit) return "";
  1460. return decodeURIComponent(hit.slice(name.length + 1));
  1461. } catch (e) {
  1462. return "";
  1463. }
  1464. };
  1465. const csrf = readCookie("csrftoken");
  1466. const appId =
  1467. document.querySelector("meta[property='al:ios:app_store_id']")?.getAttribute("content") ||
  1468. "936619743392459";
  1469. const rolloutHash =
  1470. window._sharedData?.rollout_hash ||
  1471. window.__initialData?.rollout_hash ||
  1472. "";
  1473. fetch(`/api/v1/web/comments/${mediaId}/add/`, {
  1474. method: "POST",
  1475. credentials: "include",
  1476. headers: {
  1477. "Accept": "application/json, text/plain, */*",
  1478. "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
  1479. "X-Requested-With": "XMLHttpRequest",
  1480. "X-CSRFToken": csrf,
  1481. "X-IG-App-ID": appId,
  1482. "X-Instagram-AJAX": rolloutHash,
  1483. "Referer": window.location.href
  1484. },
  1485. body: body.toString()
  1486. })
  1487. .then(async (resp) => {
  1488. const textBody = await resp.text();
  1489. done({
  1490. ok: resp.ok,
  1491. status: resp.status,
  1492. content_type: resp.headers.get("content-type") || "",
  1493. body: textBody
  1494. });
  1495. })
  1496. .catch((err) => {
  1497. done({
  1498. ok: false,
  1499. status: 0,
  1500. content_type: "",
  1501. body: "",
  1502. error: String(err)
  1503. });
  1504. });
  1505. JS
  1506. media_id.to_s.strip,
  1507. comment_text.to_s
  1508. )
  1509. end
  1510. def parse_comment_api_payload(payload)
  1511. unless payload.is_a?(Hash)
  1512. return {
  1513. ok: false,
  1514. status: nil,
  1515. error_message: "Unexpected response while posting comment",
  1516. response_preview: payload.to_s.byteslice(0, 500)
  1517. }
  1518. end
  1519. status = payload["status"]
  1520. body_raw = payload["body"].to_s
  1521. ctype = payload["content_type"].to_s
  1522. preview = body_raw.byteslice(0, 900)
  1523. return { ok: false, status: status, error_message: payload["error"].to_s.presence || "Request failed", response_preview: preview } unless payload["ok"] == true
  1524. return { ok: false, status: status, error_message: "Instagram comment API returned non-JSON response", response_preview: preview } unless ctype.include?("json")
  1525. body = JSON.parse(body_raw) rescue {}
  1526. body_status = body["status"].to_s
  1527. return { ok: false, status: status, error_message: "Instagram comment API returned status=#{body_status.presence || 'unknown'}", response_preview: preview } unless body_status == "ok"
  1528. { ok: true, status: status, body: body, response_preview: preview }
  1529. end
  1530. def with_authenticated_driver
  1531. if @account.cookies.blank?
  1532. raise AuthenticationRequiredError, "No stored cookies. Use manual login or import cookies first."
  1533. end
  1534. with_driver do |driver|
  1535. apply_session_bundle!(driver)
  1536. driver.navigate.to("#{INSTAGRAM_BASE_URL}/")
  1537. ensure_authenticated!(driver)
  1538. result = yield(driver)
  1539. refresh_account_snapshot!(driver)
  1540. result
  1541. end
  1542. end
  1543. def with_driver(headless: env_headless?)
  1544. driver = Selenium::WebDriver.for(:chrome, options: chrome_options(headless: headless))
  1545. yield(driver)
  1546. ensure
  1547. driver&.quit
  1548. end
  1549. def chrome_options(headless:)
  1550. options = Selenium::WebDriver::Chrome::Options.new
  1551. options.add_argument("--window-size=1400,1200")
  1552. options.add_argument("--disable-notifications")
  1553. options.add_argument("--disable-dev-shm-usage")
  1554. options.add_argument("--disable-gpu")
  1555. options.add_argument("--remote-debugging-pipe")
  1556. options.add_argument("--no-sandbox")
  1557. options.add_argument("--headless=new") if headless
  1558. # Enable browser console + performance logs for debugging (captured into our task artifacts when available).
  1559. # Note: ChromeDriver support varies by version; we guard reads in `capture_task_html`.
  1560. options.add_option("goog:loggingPrefs", { browser: "ALL", performance: "ALL" })
  1561. # Allow an opt-in bypass for corp TLS interception setups where the Selenium Chrome instance does not
  1562. # trust the proxy CA. Keep this OFF by default.
  1563. if ActiveModel::Type::Boolean.new.cast(ENV["INSTAGRAM_CHROME_IGNORE_CERT_ERRORS"])
  1564. options.add_argument("--ignore-certificate-errors")
  1565. options.add_argument("--ignore-ssl-errors=yes")
  1566. end
  1567. # Sticky sessions in headless are more reliable when we keep a consistent UA.
  1568. if @account.user_agent.present?
  1569. options.add_argument("--user-agent=#{@account.user_agent}")
  1570. end
  1571. options
  1572. end
  1573. def env_headless?
  1574. Rails.application.config.x.instagram.headless == true
  1575. end
  1576. def wait_for_manual_login!(driver:, timeout_seconds:)
  1577. timeout_at = Time.now + timeout_seconds
  1578. loop do
  1579. cookie_names = driver.manage.all_cookies.map { |c| c[:name] }
  1580. return if cookie_names.include?("sessionid")
  1581. raise "Timed out waiting for manual Instagram login" if Time.now > timeout_at
  1582. sleep(1)
  1583. end
  1584. end
  1585. def persist_cookies!(driver)
  1586. @account.cookies = driver.manage.all_cookies.map { |cookie| cookie.transform_keys(&:to_s) }
  1587. end
  1588. def persist_session_bundle!(driver)
  1589. # Capture after successful 2FA and redirect to authenticated session.
  1590. @account.user_agent = safe_driver_value(driver) { driver.execute_script("return navigator.userAgent") }
  1591. persist_cookies!(driver)
  1592. @account.local_storage = read_web_storage(driver, "localStorage")
  1593. @account.session_storage = read_web_storage(driver, "sessionStorage")
  1594. ig_app_id = detect_ig_app_id(driver)
  1595. @account.auth_snapshot = {
  1596. captured_at: Time.current.utc.iso8601(3),
  1597. current_url: safe_driver_value(driver) { driver.current_url },
  1598. page_title: safe_driver_value(driver) { driver.title },
  1599. ig_app_id: ig_app_id,
  1600. sessionid_present: @account.cookies.any? { |c| c["name"].to_s == "sessionid" && c["value"].to_s.present? },
  1601. cookie_names: @account.cookies.map { |c| c["name"] }.compact.uniq.sort,
  1602. local_storage_keys: @account.local_storage.map { |e| e["key"] }.compact.uniq.sort,
  1603. session_storage_keys: @account.session_storage.map { |e| e["key"] }.compact.uniq.sort
  1604. }
  1605. end
  1606. def refresh_account_snapshot!(driver)
  1607. persist_session_bundle!(driver)
  1608. @account.save! if @account.changed?
  1609. rescue StandardError => e
  1610. Rails.logger.warn("Instagram snapshot refresh skipped: #{e.class}: #{e.message}")
  1611. end
  1612. def apply_session_bundle!(driver)
  1613. # Need a base navigation first so Chrome is on the correct domain for cookies + storage.
  1614. driver.navigate.to(INSTAGRAM_BASE_URL)
  1615. apply_cookies!(driver)
  1616. write_web_storage(driver, "localStorage", @account.local_storage)
  1617. write_web_storage(driver, "sessionStorage", @account.session_storage)
  1618. end
  1619. def detect_ig_app_id(driver)
  1620. script = <<~JS
  1621. const candidates = []
  1622. const push = (value) => {
  1623. if (value === null || typeof value === "undefined") return
  1624. const text = String(value)
  1625. const match = text.match(/\\d{8,}/)
  1626. if (match) candidates.push(match[0])
  1627. }
  1628. try { push(document.documentElement?.getAttribute("data-app-id")) } catch (e) {}
  1629. try { push(window._sharedData?.config?.app_id) } catch (e) {}
  1630. try { push(window.__initialData?.config?.app_id) } catch (e) {}
  1631. try { push(window.localStorage?.getItem("ig_app_id")) } catch (e) {}
  1632. try { push(window.localStorage?.getItem("app_id")) } catch (e) {}
  1633. try { push(window.sessionStorage?.getItem("ig_app_id")) } catch (e) {}
  1634. return candidates[0] || null
  1635. JS
  1636. detected = safe_driver_value(driver) { driver.execute_script(script) }.to_s.strip
  1637. return detected if detected.present?
  1638. @account.auth_snapshot.dig("ig_app_id").to_s.presence || "936619743392459"
  1639. rescue StandardError
  1640. @account.auth_snapshot.dig("ig_app_id").to_s.presence || "936619743392459"
  1641. end
  1642. def apply_cookies!(driver)
  1643. driver.navigate.to(INSTAGRAM_BASE_URL)
  1644. @account.cookies.each do |cookie|
  1645. next if cookie["name"].blank? || cookie["value"].blank?
  1646. sanitized_cookie = {
  1647. name: cookie["name"],
  1648. value: cookie["value"],
  1649. path: cookie["path"] || "/",
  1650. secure: bool(cookie["secure"]),
  1651. http_only: bool(cookie["httpOnly"])
  1652. }
  1653. sanitized_cookie[:domain] = cookie["domain"] if cookie["domain"].present?
  1654. sanitized_cookie[:same_site] = normalize_same_site(cookie["sameSite"])
  1655. if cookie["expiry"].present?
  1656. sanitized_cookie[:expires] = cookie["expiry"].to_i
  1657. elsif cookie["expires"].present?
  1658. sanitized_cookie[:expires] = cookie["expires"].to_i
  1659. end
  1660. driver.manage.add_cookie(sanitized_cookie)
  1661. rescue Selenium::WebDriver::Error::UnableToSetCookieError
  1662. # Retry without domain/same_site for host-only or incompatible cookie attributes.
  1663. fallback_cookie = sanitized_cookie.except(:domain, :same_site)
  1664. driver.manage.add_cookie(fallback_cookie)
  1665. rescue Selenium::WebDriver::Error::InvalidCookieDomainError
  1666. next
  1667. rescue Selenium::WebDriver::Error::UnableToSetCookieError
  1668. next
  1669. end
  1670. end
  1671. def ensure_authenticated!(driver)
  1672. with_task_capture(driver: driver, task_name: "auth_validate_session") do
  1673. wait_for(driver, css: "body", timeout: 10)
  1674. # Validate against inbox route because "/" can be public and still unauthenticated.
  1675. driver.navigate.to("#{INSTAGRAM_BASE_URL}/direct/inbox/")
  1676. wait_for(driver, css: "body", timeout: 10)
  1677. if driver.current_url.include?("/accounts/login") || logged_out_page?(driver)
  1678. raise AuthenticationRequiredError, "Stored cookies are not authenticated. Re-run Manual Browser Login or import fresh cookies."
  1679. end
  1680. end
  1681. end
  1682. def collect_conversation_users(driver)
  1683. meta = { extraction: "inbox_page_source_verify_contact_row_exists" }
  1684. with_task_capture(driver: driver, task_name: "sync_collect_conversation_users", meta: meta) do
  1685. api_users = fetch_conversation_users_via_api(limit: 120)
  1686. if api_users.present?
  1687. meta[:source] = "api_direct_inbox"
  1688. meta[:unique_usernames] = api_users.length
  1689. return api_users
  1690. end
  1691. meta[:source] = "html_fallback"
  1692. users = {}
  1693. driver.navigate.to("#{INSTAGRAM_BASE_URL}/direct/inbox/")
  1694. wait_for(driver, css: "body", timeout: 10)
  1695. # Inbox content is often rendered via large JSON payloads; wait for those to exist.
  1696. Selenium::WebDriver::Wait.new(timeout: 10).until do
  1697. driver.page_source.to_s.include?("verifyContactRowExists") || driver.page_source.to_s.include?("LSVerifyContactRowExists")
  1698. end
  1699. verify_segments_total = 0
  1700. extracted_total = 0
  1701. 8.times do
  1702. html = driver.page_source.to_s
  1703. extracted, verify_segments = extract_conversation_users_from_inbox_html(html)
  1704. verify_segments_total += verify_segments
  1705. extracted_total += extracted.length
  1706. extracted.each do |username, attrs|
  1707. users[username] ||= attrs
  1708. end
  1709. # Inbox uses a nested scroller in many builds; try to scroll that first.
  1710. driver.execute_script(<<~JS)
  1711. const candidate =
  1712. document.querySelector("div[role='main']") ||
  1713. document.querySelector("div[role='grid']") ||
  1714. document.scrollingElement ||
  1715. document.documentElement ||
  1716. document.body;
  1717. try { candidate.scrollTop = (candidate.scrollTop || 0) + 750; } catch (e) {}
  1718. try { window.scrollBy(0, 750); } catch (e) {}
  1719. JS
  1720. sleep(0.4)
  1721. end
  1722. meta[:verify_contact_row_segments] = verify_segments_total
  1723. meta[:extracted_usernames_total] = extracted_total
  1724. meta[:unique_usernames] = users.length
  1725. users
  1726. end
  1727. end
  1728. def collect_story_users(driver)
  1729. meta = { extraction: "home_stories_anchors_and_regex" }
  1730. with_task_capture(driver: driver, task_name: "sync_collect_story_users", meta: meta) do
  1731. api_users = fetch_story_users_via_api
  1732. if api_users.present?
  1733. meta[:source] = "api_reels_tray"
  1734. meta[:unique_story_usernames] = api_users.length
  1735. return api_users
  1736. end
  1737. meta[:source] = "html_fallback"
  1738. users = {}
  1739. driver.navigate.to(INSTAGRAM_BASE_URL)
  1740. wait_for(driver, css: "body", timeout: 10)
  1741. dismiss_common_overlays!(driver)
  1742. html = driver.page_source.to_s
  1743. extracted_users = extract_story_users_from_home_html(html)
  1744. meta[:story_prefetch_usernames] = extracted_users.length
  1745. extracted_users.each do |username|
  1746. users[username] ||= { display_name: username }
  1747. end
  1748. # If we didn't get anything from prefetched query payloads, try DOM anchors as a fallback.
  1749. if users.empty?
  1750. begin
  1751. Selenium::WebDriver::Wait.new(timeout: 12).until do
  1752. driver.find_elements(css: "a[href*='/stories/']").any?
  1753. end
  1754. rescue Selenium::WebDriver::Error::TimeoutError
  1755. meta[:story_anchor_wait_timed_out] = true
  1756. end
  1757. story_hrefs = driver.find_elements(css: "a[href*='/stories/']").map { |a| a.attribute("href").to_s }.reject(&:blank?)
  1758. meta[:story_anchor_hrefs] = story_hrefs.length
  1759. story_hrefs.each do |href|
  1760. username = href.split("/stories/").last.to_s.split("/").first.to_s
  1761. username = normalize_username(username)
  1762. next if username.blank?
  1763. users[username] ||= { display_name: username }
  1764. end
  1765. # Fallback: parse the page source for story links even if anchors use different tag/attrs.
  1766. html = driver.page_source.to_s
  1767. story_usernames = html.scan(%r{/stories/([A-Za-z0-9._]{1,30})/}).flatten.map { |u| normalize_username(u) }.reject(&:blank?).uniq
  1768. meta[:story_regex_usernames] = story_usernames.length
  1769. story_usernames.each do |username|
  1770. users[username] ||= { display_name: username }
  1771. end
  1772. else
  1773. meta[:story_anchor_hrefs] = 0
  1774. meta[:story_regex_usernames] = 0
  1775. end
  1776. meta[:unique_story_usernames] = users.length
  1777. users
  1778. end
  1779. end
  1780. def extract_feed_items_from_dom(driver)
  1781. api_items = fetch_home_feed_items_via_api(limit: 50)
  1782. return api_items if api_items.present?
  1783. # Instagram feed markup changes a lot. We rely on robust link patterns (/p/ and /reel/).
  1784. driver.execute_script(<<~JS)
  1785. const out = [];
  1786. const uniq = new Set();
  1787. const linkEls = Array.from(document.querySelectorAll("a[href^='/p/'], a[href^='/reel/']"));
  1788. for (const a of linkEls) {
  1789. const href = (a.getAttribute("href") || "").trim();
  1790. if (!href) continue;
  1791. const parts = href.split("/");
  1792. // /p/<shortcode>/...
  1793. const idx = parts.findIndex((p) => p === "p" || p === "reel");
  1794. if (idx < 0 || !parts[idx + 1]) continue;
  1795. const kind = parts[idx];
  1796. const shortcode = parts[idx + 1];
  1797. if (!shortcode || uniq.has(shortcode)) continue;
  1798. uniq.add(shortcode);
  1799. // Try to find a nearby article container for metadata.
  1800. let node = a;
  1801. for (let j = 0; j < 8; j++) {
  1802. if (!node) break;
  1803. if (node.tagName && node.tagName.toLowerCase() === "article") break;
  1804. node = node.parentElement;
  1805. }
  1806. const container = node && node.tagName && node.tagName.toLowerCase() === "article" ? node : a.closest("article") || a.parentElement;
  1807. // Author username: attempt to find a link that looks like /username/
  1808. let author = null;
  1809. if (container) {
  1810. const authorLink = Array.from(container.querySelectorAll("a[href^='/']")).find((x) => {
  1811. const h = (x.getAttribute("href") || "").trim();
  1812. if (!h) return false;
  1813. if (h.startsWith("/p/") || h.startsWith("/reel/") || h.startsWith("/stories/") || h.startsWith("/explore/") || h.startsWith("/direct/")) return false;
  1814. const seg = h.split("/").filter(Boolean)[0];
  1815. return seg && seg.length <= 30 && /^[A-Za-z0-9._]+$/.test(seg);
  1816. });
  1817. if (authorLink) {
  1818. const h = (authorLink.getAttribute("href") || "").trim();
  1819. author = h.split("/").filter(Boolean)[0] || null;
  1820. }
  1821. }
  1822. // Media URL: prefer the first visible img.
  1823. let mediaUrl = null;
  1824. let naturalWidth = null;
  1825. let naturalHeight = null;
  1826. if (container) {
  1827. const img = Array.from(container.querySelectorAll("img")).find((img) => {
  1828. const r = img.getBoundingClientRect();
  1829. return r.width > 80 && r.height > 80;
  1830. });
  1831. if (img) {
  1832. mediaUrl = img.currentSrc || img.getAttribute("src") || null;
  1833. naturalWidth = Number(img.naturalWidth || 0) || null;
  1834. naturalHeight = Number(img.naturalHeight || 0) || null;
  1835. }
  1836. }
  1837. out.push({
  1838. shortcode,
  1839. post_kind: kind === "reel" ? "reel" : "post",
  1840. author_username: author,
  1841. media_url: mediaUrl,
  1842. caption: null,
  1843. metadata: { href, natural_width: naturalWidth, natural_height: naturalHeight }
  1844. });
  1845. }
  1846. return out.slice(0, 60);
  1847. JS
  1848. .map do |h|
  1849. {
  1850. shortcode: h["shortcode"],
  1851. post_kind: h["post_kind"],
  1852. author_username: normalize_username(h["author_username"].to_s),
  1853. media_url: h["media_url"].to_s,
  1854. caption: h["caption"],
  1855. metadata: h["metadata"] || {}
  1856. }
  1857. end
  1858. rescue StandardError
  1859. []
  1860. end
  1861. def collect_follow_list(driver, list_kind:, profile_username:)
  1862. meta = { list_kind: list_kind.to_s, profile_username: profile_username }
  1863. with_task_capture(driver: driver, task_name: "sync_collect_#{list_kind}", meta: meta) do
  1864. api_users = fetch_follow_list_via_api(profile_username: profile_username, list_kind: list_kind)
  1865. if api_users.present?
  1866. meta[:source] = "api_friendships"
  1867. meta[:unique_usernames] = api_users.length
  1868. return api_users
  1869. end
  1870. meta[:source] = "html_fallback"
  1871. list_path = (list_kind == :followers) ? "followers" : "following"
  1872. list_url = "#{INSTAGRAM_BASE_URL}/#{profile_username}/#{list_path}/"
  1873. profile_url = "#{INSTAGRAM_BASE_URL}/#{profile_username}/"
  1874. meta[:list_url] = list_url
  1875. meta[:profile_url] = profile_url
  1876. dialog =
  1877. begin
  1878. meta[:open_strategy] = "direct_url"
  1879. driver.navigate.to(list_url)
  1880. wait_for(driver, css: "body", timeout: 12)
  1881. dismiss_common_overlays!(driver)
  1882. wait_for(driver, css: "div[role='dialog']", timeout: 12)
  1883. rescue Selenium::WebDriver::Error::TimeoutError
  1884. nil
  1885. end
  1886. unless dialog
  1887. # Fallback for builds that don't open the modal on the /followers/ route until after profile renders.
  1888. meta[:open_strategy] = "profile_click_fallback"
  1889. driver.navigate.to(profile_url)
  1890. wait_for(driver, css: "body", timeout: 12)
  1891. dismiss_common_overlays!(driver)
  1892. href_fragment = "/#{list_path}/"
  1893. # Some profiles render counts lazily; wait briefly for the link to appear.
  1894. begin
  1895. Selenium::WebDriver::Wait.new(timeout: 12).until do
  1896. driver.execute_script(<<~JS, href_fragment)
  1897. const frag = arguments[0];
  1898. const els = Array.from(document.querySelectorAll("a[href]"));
  1899. return els.some((a) => (a.getAttribute("href") || "").includes(frag));
  1900. JS
  1901. end
  1902. rescue Selenium::WebDriver::Error::TimeoutError
  1903. nil
  1904. end
  1905. clicked = false
  1906. attempts = 0
  1907. 8.times do
  1908. attempts += 1
  1909. begin
  1910. clicked = driver.execute_script(<<~JS, href_fragment)
  1911. const frag = arguments[0];
  1912. const candidates = Array.from(document.querySelectorAll(`a[href*="${frag}"]`));
  1913. if (!candidates.length) return false;
  1914. const isVisible = (el) => {
  1915. const r = el.getBoundingClientRect();
  1916. const cs = window.getComputedStyle(el);
  1917. return cs && cs.visibility !== "hidden" && cs.display !== "none" && r.width > 0 && r.height > 0;
  1918. };
  1919. const link = candidates.find(isVisible) || candidates[0];
  1920. try { link.scrollIntoView({block: "center", inline: "nearest"}); } catch (e) {}
  1921. try { link.click(); return true; } catch (e) {}
  1922. try { link.dispatchEvent(new MouseEvent("click", {bubbles: true, cancelable: true, view: window})); return true; } catch (e) {}
  1923. return false;
  1924. JS
  1925. rescue Selenium::WebDriver::Error::StaleElementReferenceError,
  1926. Selenium::WebDriver::Error::JavascriptError,
  1927. Selenium::WebDriver::Error::ElementClickInterceptedError,
  1928. Selenium::WebDriver::Error::ElementNotInteractableError
  1929. clicked = false
  1930. end
  1931. break if clicked
  1932. sleep(0.35)
  1933. end
  1934. meta[:profile_link_click_attempts] = attempts
  1935. raise "Unable to find #{list_kind} link on profile" unless clicked
  1936. dialog = wait_for(driver, css: "div[role='dialog']", timeout: 12)
  1937. end
  1938. if (counts = extract_profile_follow_counts(driver.page_source.to_s))
  1939. meta[:expected_followers] = counts[:followers]
  1940. meta[:expected_following] = counts[:following]
  1941. meta[:expected_count] = (list_kind == :followers) ? counts[:followers] : counts[:following]
  1942. end
  1943. # The dialog often opens in a skeleton/loading state; if we start extracting immediately we'll
  1944. # see 0 usernames and prematurely terminate. Wait briefly for at least one profile row anchor.
  1945. begin
  1946. Selenium::WebDriver::Wait.new(timeout: 20).until do
  1947. driver.execute_script(<<~'JS')
  1948. const dialog = document.querySelector("div[role='dialog']");
  1949. if (!dialog) return false;
  1950. const anchors = Array.from(dialog.querySelectorAll("a[href^='/']"));
  1951. return anchors.some((a) => {
  1952. const href = (a.getAttribute("href") || "").trim();
  1953. return /^\/[A-Za-z0-9._]{1,30}\/(?:\?.*)?$/.test(href);
  1954. });
  1955. JS
  1956. end
  1957. rescue Selenium::WebDriver::Error::TimeoutError
  1958. # We'll still attempt extraction; capture will show the loading state HTML.
  1959. end
  1960. users = {}
  1961. stable_rounds = 0
  1962. last_count = 0
  1963. stuck_rounds = 0
  1964. last_scroll_top = nil
  1965. max_rounds = (list_kind == :following) ? 750 : 260
  1966. max_rounds.times do
  1967. payload = driver.execute_script(<<~'JS')
  1968. const dialog = document.querySelector("div[role='dialog']");
  1969. if (!dialog) return { out: [], scrolled: false, dialog_found: false };
  1970. const out = [];
  1971. const anchors = Array.from(dialog.querySelectorAll("a[href^='/']"));
  1972. for (const a of anchors) {
  1973. const href = (a.getAttribute("href") || "").trim();
  1974. const m = href.match(/^\/([A-Za-z0-9._]{1,30})\/(?:\?.*)?$/);
  1975. if (!m) continue;
  1976. const username = (m[1] || "").toLowerCase();
  1977. if (!username) continue;
  1978. // Exclude common non-profile routes that can appear in dialogs.
  1979. const reserved = new Set(["accounts","explore","direct","p","reel","reels","stories","about","privacy","terms"]);
  1980. if (reserved.has(username)) continue;
  1981. const row = a.closest("div");
  1982. const img = row ? row.querySelector("img") : null;
  1983. const pic = img ? (img.getAttribute("src") || "") : "";
  1984. const alt = img ? (img.getAttribute("alt") || "") : "";
  1985. // Display name is often in a sibling span; best-effort only.
  1986. let display = "";
  1987. if (row) {
  1988. const spans = Array.from(row.querySelectorAll("span")).map((s) => (s.textContent || "").trim()).filter(Boolean);
  1989. // Username is typically present; choose a non-username candidate if possible.
  1990. display = spans.find((t) => t.toLowerCase() !== username) || "";
  1991. }
  1992. if (!display && alt) {
  1993. // Common patterns: "Full Name's profile picture" or "Profile picture"
  1994. const cleaned = alt
  1995. .replace(/'s profile picture/gi, "")
  1996. .replace(/profile picture/gi, "")
  1997. .trim();
  1998. if (cleaned && cleaned.toLowerCase() !== username) display = cleaned;
  1999. }
  2000. out.push({ username: username, display_name: display, profile_pic_url: pic });
  2001. }
  2002. // Scroll the modal list to load more entries.
  2003. // IG sometimes places the actual scroll container on a nested node, and not always a div.
  2004. // Choose the scrollable element that contains the most profile-link anchors.
  2005. const allNodes = Array.from(dialog.querySelectorAll("*"));
  2006. const scrollables = allNodes.filter((el) => {
  2007. try { return (el.scrollHeight - el.clientHeight) > 180; } catch (e) { return false; }
  2008. });
  2009. const scoreScroller = (el) => {
  2010. let links = 0;
  2011. try {
  2012. const anchors = Array.from(el.querySelectorAll("a[href^='/']"));
  2013. for (const a of anchors) {
  2014. const href = (a.getAttribute("href") || "").trim();
  2015. if (/^\/[A-Za-z0-9._]{1,30}\/(?:\?.*)?$/.test(href)) links += 1;
  2016. }
  2017. } catch (e) {}
  2018. let sh = 0;
  2019. try { sh = el.scrollHeight || 0; } catch (e) {}
  2020. return { links: links, sh: sh };
  2021. };
  2022. let scroller = null;
  2023. let best = { links: -1, sh: -1 };
  2024. for (const el of scrollables) {
  2025. const s = scoreScroller(el);
  2026. // Prefer the largest scrollHeight; it tends to represent the "true" list scroller.
  2027. if (s.sh > best.sh || (s.sh === best.sh && s.links > best.links)) {
  2028. best = s;
  2029. scroller = el;
  2030. }
  2031. }
  2032. scroller = scroller || dialog;
  2033. let before = 0;
  2034. try { before = scroller.scrollTop || 0; } catch (e) {}
  2035. try { scroller.scrollTop = before + scroller.clientHeight * 0.95; } catch (e) {}
  2036. // If the computed scroller doesn't move, try a scrollBy fallback.
  2037. try {
  2038. if ((scroller.scrollTop || 0) === before) scroller.scrollBy(0, Math.max(120, scroller.clientHeight || 0));
  2039. } catch (e) {}
  2040. let after = before;
  2041. let sh = 0;
  2042. let ch = 0;
  2043. try { after = scroller.scrollTop || after; } catch (e) {}
  2044. try { sh = scroller.scrollHeight || 0; } catch (e) {}
  2045. try { ch = scroller.clientHeight || 0; } catch (e) {}
  2046. const at_end = (ch > 0) ? ((after + ch) >= (sh - 4)) : false;
  2047. const did_scroll = after !== before;
  2048. const loading = !!dialog.querySelector("[role='progressbar'], svg[aria-label='Loading...'], div[data-visualcompletion='loading-state']");
  2049. return {
  2050. out: out,
  2051. scrolled: true,
  2052. dialog_found: true,
  2053. scroll_top: after,
  2054. scroll_height: sh,
  2055. client_height: ch,
  2056. at_end: at_end,
  2057. did_scroll: did_scroll,
  2058. scroller_score: best,
  2059. scrollers_seen: scrollables.length,
  2060. loading: loading
  2061. };
  2062. JS
  2063. unless payload.is_a?(Hash) && (payload["dialog_found"] == true || payload[:dialog_found] == true)
  2064. # If the modal was replaced/closed due to navigation, stop early.
  2065. break
  2066. end
  2067. batch = payload["out"] || payload[:out] || []
  2068. at_end = payload["at_end"] == true || payload[:at_end] == true
  2069. did_scroll = payload["did_scroll"] == true || payload[:did_scroll] == true
  2070. loading = payload["loading"] == true || payload[:loading] == true
  2071. scroll_top = payload["scroll_top"] || payload[:scroll_top]
  2072. scroller_score = payload["scroller_score"] || payload[:scroller_score]
  2073. scrollers_seen = payload["scrollers_seen"] || payload[:scrollers_seen]
  2074. Array(batch).each do |entry|
  2075. u = normalize_username(entry["username"] || entry[:username])
  2076. next if u.blank?
  2077. users[u] ||= {
  2078. display_name: (entry["display_name"] || entry[:display_name]).presence,
  2079. profile_pic_url: (entry["profile_pic_url"] || entry[:profile_pic_url]).presence
  2080. }
  2081. end
  2082. if users.length == last_count
  2083. stable_rounds += 1
  2084. else
  2085. stable_rounds = 0
  2086. last_count = users.length
  2087. end
  2088. if scroll_top
  2089. if last_scroll_top && scroll_top.to_f <= (last_scroll_top.to_f + 1.0)
  2090. stuck_rounds += 1
  2091. else
  2092. stuck_rounds = 0
  2093. end
  2094. last_scroll_top = scroll_top
  2095. end
  2096. meta[:scroll_top] = scroll_top
  2097. meta[:scroll_stuck_rounds] = stuck_rounds
  2098. meta[:stable_rounds] = stable_rounds
  2099. meta[:at_end] = at_end
  2100. meta[:did_scroll] = did_scroll
  2101. meta[:loading] = loading
  2102. meta[:scroller_score] = scroller_score if scroller_score
  2103. meta[:scrollers_seen] = scrollers_seen if scrollers_seen
  2104. expected_count = meta[:expected_count].to_i
  2105. if expected_count.positive? && users.length >= expected_count
  2106. break
  2107. end
  2108. # If the modal is still loading and we haven't found anyone yet, keep waiting instead of
  2109. # tripping the stable_rounds safety breaker.
  2110. if users.empty? && loading
  2111. stable_rounds = 0
  2112. sleep(0.75)
  2113. next
  2114. end
  2115. # If we never actually scroll, IG likely swapped/locked the scroll container.
  2116. # Reset stable counter to allow more time and let subsequent iterations re-select the scroller.
  2117. unless did_scroll
  2118. stable_rounds = 0 if stable_rounds < 4
  2119. end
  2120. # Break only once we hit the end of the scroll region and nothing new has loaded for a bit.
  2121. far_from_expected =
  2122. expected_count.positive? && users.length < (expected_count * 0.98).floor
  2123. break if at_end && stable_rounds >= 3 && !far_from_expected
  2124. break if (stuck_rounds >= 25) && !far_from_expected
  2125. break if (stable_rounds >= 60) && !far_from_expected
  2126. sleep(
  2127. if loading
  2128. 0.8
  2129. elsif stable_rounds >= 10
  2130. 1.15
  2131. elsif stable_rounds >= 3
  2132. 0.8
  2133. else
  2134. 0.4
  2135. end
  2136. )
  2137. end
  2138. meta[:unique_usernames] = users.length
  2139. begin
  2140. driver.action.send_keys(:escape).perform
  2141. rescue StandardError
  2142. nil
  2143. end
  2144. users
  2145. end
  2146. end
  2147. def upsert_follow_list!(users_hash, following_flag:, follows_you_flag:)
  2148. now = Time.current
  2149. users_hash.each do |username, attrs|
  2150. profile = @account.instagram_profiles.find_or_initialize_by(username: username)
  2151. # If we already have a profile_pic_url, keep it unless we received a new one.
  2152. new_pic = attrs.dig(:profile_pic_url).presence
  2153. profile.profile_pic_url = new_pic if new_pic.present?
  2154. new_display = attrs.dig(:display_name).presence
  2155. profile.display_name = new_display if new_display.present?
  2156. profile.following = true if following_flag
  2157. profile.follows_you = true if follows_you_flag
  2158. profile.last_synced_at = now
  2159. profile.save!
  2160. end
  2161. end
  2162. def fetch_eligibility(driver, username)
  2163. with_task_capture(driver: driver, task_name: "sync_fetch_eligibility", meta: { username: username }) do
  2164. api_result = verify_messageability_from_api(username: username)
  2165. if api_result.is_a?(Hash) && !api_result[:can_message].nil?
  2166. return {
  2167. can_message: api_result[:can_message],
  2168. restriction_reason: api_result[:restriction_reason],
  2169. source: "api",
  2170. dm_state: api_result[:dm_state],
  2171. dm_reason: api_result[:dm_reason],
  2172. dm_retry_after_at: api_result[:dm_retry_after_at]
  2173. }
  2174. end
  2175. driver.navigate.to("#{INSTAGRAM_BASE_URL}/#{username}/")
  2176. wait_for(driver, css: "body", timeout: 8)
  2177. page = driver.page_source.to_s
  2178. page_down = page.downcase
  2179. # If we hit a generic error page or an interstitial, eligibility is unknown.
  2180. if page_down.include?("something went wrong") ||
  2181. page_down.include?("unexpected error") ||
  2182. page_down.include?("polarishttp500") ||
  2183. page_down.include?("try again later")
  2184. return { can_message: false, restriction_reason: "Unable to verify messaging availability (profile load error)" }
  2185. end
  2186. # "Message" often renders as <div role="button"> on modern IG builds (not only <button>).
  2187. message_cta =
  2188. driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][normalize-space()='Message']").first ||
  2189. driver.find_elements(xpath: "//*[self::a and @role='link' and normalize-space()='Message']").first
  2190. follow_cta =
  2191. driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][normalize-space()='Follow']").first ||
  2192. driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][normalize-space()='Requested']").first
  2193. if message_cta
  2194. { can_message: true, restriction_reason: nil }
  2195. elsif follow_cta
  2196. { can_message: false, restriction_reason: "User is not currently messageable from this account" }
  2197. elsif page_down.include?("private")
  2198. { can_message: false, restriction_reason: "Private or restricted profile" }
  2199. else
  2200. { can_message: false, restriction_reason: "Unable to verify messaging availability" }
  2201. end
  2202. end
  2203. end
  2204. def fetch_profile_details_from_driver(driver, username:)
  2205. username = normalize_username(username)
  2206. raise "Username cannot be blank" if username.blank?
  2207. with_task_capture(driver: driver, task_name: "profile_fetch_details", meta: { username: username }) do
  2208. api_details = fetch_profile_details_via_api(username)
  2209. return api_details if api_details.present?
  2210. driver.navigate.to("#{INSTAGRAM_BASE_URL}/#{username}/")
  2211. wait_for(driver, css: "body", timeout: 10)
  2212. dismiss_common_overlays!(driver)
  2213. html = driver.page_source.to_s
  2214. display_name = nil
  2215. if (og = html.match(/property=\"og:title\" content=\"([^\"]+)\"/))
  2216. og_title = CGI.unescapeHTML(og[1].to_s)
  2217. # Examples: "Name (@username) • Instagram photos and videos"
  2218. if (m = og_title.match(/\A(.+?)\s*\(@#{Regexp.escape(username)}\)\b/))
  2219. display_name = m[1].to_s.strip
  2220. end
  2221. end
  2222. pic = nil
  2223. if (img = html.match(/property=\"og:image\" content=\"([^\"]+)\"/))
  2224. pic = CGI.unescapeHTML(img[1].to_s).strip
  2225. end
  2226. web_info = fetch_web_profile_info(username)
  2227. web_user = web_info.is_a?(Hash) ? web_info.dig("data", "user") : nil
  2228. ig_user_id = web_user.is_a?(Hash) ? web_user["id"].to_s.strip.presence : nil
  2229. bio = web_user.is_a?(Hash) ? web_user["biography"].to_s.presence : nil
  2230. full_name = web_user.is_a?(Hash) ? web_user["full_name"].to_s.strip.presence : nil
  2231. followers_count = web_user.is_a?(Hash) ? normalize_count(web_user["follower_count"]) : nil
  2232. followers_count ||= extract_profile_follow_counts(html)&.dig(:followers)
  2233. category_name = web_user.is_a?(Hash) ? web_user["category_name"].to_s.strip.presence : nil
  2234. is_business_account = web_user.is_a?(Hash) ? ActiveModel::Type::Boolean.new.cast(web_user["is_business_account"]) : nil
  2235. display_name ||= full_name
  2236. post = extract_latest_post_from_profile_dom(driver)
  2237. post = extract_latest_post_from_profile_html(html) if post[:taken_at].blank? && post[:shortcode].blank?
  2238. post = extract_latest_post_from_profile_http(username) if post[:taken_at].blank? && post[:shortcode].blank?
  2239. {
  2240. username: username,
  2241. display_name: display_name,
  2242. profile_pic_url: pic,
  2243. ig_user_id: ig_user_id,
  2244. bio: bio,
  2245. followers_count: followers_count,
  2246. category_name: category_name,
  2247. is_business_account: is_business_account,
  2248. last_post_at: post[:taken_at],
  2249. latest_post_shortcode: post[:shortcode]
  2250. }
  2251. end
  2252. end
  2253. def extract_latest_post_from_profile_html(html)
  2254. return { taken_at: nil, shortcode: nil } if html.blank?
  2255. # Prefer restricting our search to a window around the timeline media payload to avoid
  2256. # grabbing unrelated timestamps elsewhere in the page.
  2257. idx = html.index("edge_owner_to_timeline_media") || html.index("timeline_media")
  2258. window = idx ? (html.byteslice(idx, 250_000) || "") : html
  2259. taken_at = nil
  2260. shortcode = nil
  2261. if (m = window.match(/\"taken_at_timestamp\":(\d{9,})/))
  2262. ts = m[1].to_i
  2263. taken_at = Time.at(ts).utc rescue nil
  2264. end
  2265. if (m = window.match(/\"shortcode\":\"([A-Za-z0-9_-]{5,})\"/))
  2266. shortcode = m[1].to_s
  2267. end
  2268. { taken_at: taken_at, shortcode: shortcode }
  2269. rescue StandardError
  2270. { taken_at: nil, shortcode: nil }
  2271. end
  2272. def extract_latest_post_from_profile_dom(driver)
  2273. with_task_capture(driver: driver, task_name: "profile_latest_post_dom") do
  2274. begin
  2275. wait_for(driver, css: "body", timeout: 6)
  2276. dismiss_common_overlays!(driver)
  2277. # Wait for the grid to hydrate (Instagram often renders posts after JS loads).
  2278. begin
  2279. Selenium::WebDriver::Wait.new(timeout: 12).until do
  2280. driver.find_elements(css: "article a[href^='/p/'], article a[href^='/reel/']").any? ||
  2281. driver.page_source.to_s.include?("No posts yet") ||
  2282. driver.page_source.to_s.include?("This Account is Private")
  2283. end
  2284. rescue Selenium::WebDriver::Error::TimeoutError
  2285. nil
  2286. end
  2287. link =
  2288. driver.find_elements(css: "article a[href^='/p/']").find(&:displayed?) ||
  2289. driver.find_elements(css: "article a[href^='/reel/']").find(&:displayed?) ||
  2290. driver.find_elements(css: "a[href^='/p/']").find(&:displayed?) ||
  2291. driver.find_elements(css: "a[href^='/reel/']").find(&:displayed?)
  2292. unless link
  2293. next({ taken_at: nil, shortcode: nil })
  2294. end
  2295. href = link.attribute("href").to_s
  2296. shortcode =
  2297. if href.include?("/p/")
  2298. href.split("/p/").last.to_s.split("/").first.to_s
  2299. elsif href.include?("/reel/")
  2300. href.split("/reel/").last.to_s.split("/").first.to_s
  2301. end
  2302. driver.execute_script("arguments[0].click()", link)
  2303. time_el = wait_for(driver, css: "time[datetime]", timeout: 10)
  2304. dt = time_el.attribute("datetime").to_s
  2305. taken_at =
  2306. begin
  2307. Time.iso8601(dt).utc
  2308. rescue StandardError
  2309. Time.parse(dt).utc
  2310. end
  2311. begin
  2312. driver.action.send_keys(:escape).perform
  2313. rescue StandardError
  2314. nil
  2315. end
  2316. { taken_at: taken_at, shortcode: shortcode.presence }
  2317. rescue Selenium::WebDriver::Error::TimeoutError
  2318. { taken_at: nil, shortcode: nil }
  2319. rescue StandardError
  2320. { taken_at: nil, shortcode: nil }
  2321. end
  2322. end
  2323. end
  2324. def extract_latest_post_from_profile_http(username)
  2325. username = normalize_username(username)
  2326. return { taken_at: nil, shortcode: nil } if username.blank?
  2327. data = fetch_web_profile_info(username)
  2328. return { taken_at: nil, shortcode: nil } unless data.is_a?(Hash)
  2329. user = data.dig("data", "user")
  2330. return { taken_at: nil, shortcode: nil } unless user.is_a?(Hash)
  2331. node =
  2332. user.dig("edge_owner_to_timeline_media", "edges")&.first&.dig("node") ||
  2333. user.dig("edge_felix_video_timeline", "edges")&.first&.dig("node")
  2334. if node.is_a?(Hash)
  2335. ts = node["taken_at_timestamp"] || node["taken_at"] || node["taken_at_time"]
  2336. taken_at =
  2337. begin
  2338. ts.present? ? Time.at(ts.to_i).utc : nil
  2339. rescue StandardError
  2340. nil
  2341. end
  2342. shortcode = node["shortcode"].to_s.strip.presence
  2343. return { taken_at: taken_at, shortcode: shortcode }
  2344. end
  2345. # Fallback: fetch the user's feed items (this endpoint still works on builds where timeline edges are empty).
  2346. user_id = user["id"].to_s.strip
  2347. return { taken_at: nil, shortcode: nil } if user_id.blank?
  2348. feed = fetch_user_feed(user_id: user_id, referer_username: username, count: 6)
  2349. item = feed.is_a?(Hash) ? Array(feed["items"]).first : nil
  2350. return { taken_at: nil, shortcode: nil } unless item.is_a?(Hash)
  2351. taken_at =
  2352. begin
  2353. ts = item["taken_at"]
  2354. ts.present? ? Time.at(ts.to_i).utc : nil
  2355. rescue StandardError
  2356. nil
  2357. end
  2358. shortcode = (item["code"] || item["shortcode"]).to_s.strip.presence
  2359. { taken_at: taken_at, shortcode: shortcode }
  2360. rescue StandardError
  2361. { taken_at: nil, shortcode: nil }
  2362. end
  2363. def fetch_web_profile_info(username)
  2364. # Unofficial endpoint used by the Instagram web app; requires authenticated cookies.
  2365. uri = URI.parse("#{INSTAGRAM_BASE_URL}/api/v1/users/web_profile_info/?username=#{username}")
  2366. http = Net::HTTP.new(uri.host, uri.port)
  2367. http.use_ssl = (uri.scheme == "https")
  2368. http.open_timeout = 10
  2369. http.read_timeout = 20
  2370. req = Net::HTTP::Get.new(uri.request_uri)
  2371. req["User-Agent"] = @account.user_agent.presence || "Mozilla/5.0"
  2372. req["Accept"] = "application/json, text/plain, */*"
  2373. req["X-Requested-With"] = "XMLHttpRequest"
  2374. req["X-IG-App-ID"] = (@account.auth_snapshot.dig("ig_app_id").presence || "936619743392459")
  2375. req["Referer"] = "#{INSTAGRAM_BASE_URL}/#{username}/"
  2376. csrf = @account.cookies.find { |c| c["name"].to_s == "csrftoken" }&.dig("value").to_s
  2377. req["X-CSRFToken"] = csrf if csrf.present?
  2378. req["Cookie"] = cookie_header_for(@account.cookies)
  2379. res = http.request(req)
  2380. return nil unless res.is_a?(Net::HTTPSuccess)
  2381. JSON.parse(res.body.to_s)
  2382. rescue StandardError
  2383. nil
  2384. end
  2385. def fetch_user_feed(user_id:, referer_username:, count:, max_id: nil)
  2386. q = [ "count=#{count.to_i.clamp(1, 30)}" ]
  2387. q << "max_id=#{CGI.escape(max_id.to_s)}" if max_id.present?
  2388. uri = URI.parse("#{INSTAGRAM_BASE_URL}/api/v1/feed/user/#{user_id}/?#{q.join('&')}")
  2389. http = Net::HTTP.new(uri.host, uri.port)
  2390. http.use_ssl = (uri.scheme == "https")
  2391. http.open_timeout = 10
  2392. http.read_timeout = 20
  2393. req = Net::HTTP::Get.new(uri.request_uri)
  2394. req["User-Agent"] = @account.user_agent.presence || "Mozilla/5.0"
  2395. req["Accept"] = "application/json, text/plain, */*"
  2396. req["X-Requested-With"] = "XMLHttpRequest"
  2397. req["X-IG-App-ID"] = (@account.auth_snapshot.dig("ig_app_id").presence || "936619743392459")
  2398. req["Referer"] = "#{INSTAGRAM_BASE_URL}/#{referer_username}/"
  2399. csrf = @account.cookies.find { |c| c["name"].to_s == "csrftoken" }&.dig("value").to_s
  2400. req["X-CSRFToken"] = csrf if csrf.present?
  2401. req["Cookie"] = cookie_header_for(@account.cookies)
  2402. res = http.request(req)
  2403. return nil unless res.is_a?(Net::HTTPSuccess)
  2404. JSON.parse(res.body.to_s)
  2405. rescue StandardError
  2406. nil
  2407. end
  2408. def fetch_profile_details_via_api(username)
  2409. uname = normalize_username(username)
  2410. return nil if uname.blank?
  2411. web_info = fetch_web_profile_info(uname)
  2412. user = web_info.is_a?(Hash) ? web_info.dig("data", "user") : nil
  2413. return nil unless user.is_a?(Hash)
  2414. latest = extract_latest_post_from_profile_http(uname)
  2415. {
  2416. username: uname,
  2417. display_name: user["full_name"].to_s.strip.presence,
  2418. profile_pic_url: CGI.unescapeHTML(user["profile_pic_url_hd"].to_s).strip.presence || CGI.unescapeHTML(user["profile_pic_url"].to_s).strip.presence,
  2419. ig_user_id: user["id"].to_s.strip.presence,
  2420. bio: user["biography"].to_s.presence,
  2421. followers_count: normalize_count(user["follower_count"]),
  2422. category_name: user["category_name"].to_s.strip.presence,
  2423. is_business_account: ActiveModel::Type::Boolean.new.cast(user["is_business_account"]),
  2424. last_post_at: latest[:taken_at],
  2425. latest_post_shortcode: latest[:shortcode]
  2426. }
  2427. rescue StandardError
  2428. nil
  2429. end
  2430. def fetch_follow_list_via_api(profile_username:, list_kind:)
  2431. uname = normalize_username(profile_username)
  2432. return {} if uname.blank?
  2433. web_info = fetch_web_profile_info(uname)
  2434. user = web_info.is_a?(Hash) ? web_info.dig("data", "user") : nil
  2435. user_id = user.is_a?(Hash) ? user["id"].to_s.strip : ""
  2436. return {} if user_id.blank?
  2437. endpoint = (list_kind.to_sym == :followers) ? "followers" : "following"
  2438. max_id = nil
  2439. users = {}
  2440. safety = 0
  2441. loop do
  2442. safety += 1
  2443. break if safety > 25
  2444. query = [ "count=200" ]
  2445. query << "max_id=#{CGI.escape(max_id)}" if max_id.present?
  2446. path = "/api/v1/friendships/#{user_id}/#{endpoint}/?#{query.join('&')}"
  2447. body = ig_api_get_json(path: path, referer: "#{INSTAGRAM_BASE_URL}/#{uname}/")
  2448. break unless body.is_a?(Hash)
  2449. Array(body["users"]).each do |entry|
  2450. next unless entry.is_a?(Hash)
  2451. username = normalize_username(entry["username"])
  2452. next if username.blank?
  2453. users[username] ||= {
  2454. display_name: entry["full_name"].to_s.strip.presence || username,
  2455. profile_pic_url: CGI.unescapeHTML(entry["profile_pic_url"].to_s).strip.presence
  2456. }
  2457. end
  2458. max_id = body["next_max_id"].to_s.strip.presence
  2459. break if max_id.blank?
  2460. end
  2461. users
  2462. rescue StandardError
  2463. {}
  2464. end
  2465. def fetch_mutual_friends_via_api(profile_username:, limit:)
  2466. uname = normalize_username(profile_username)
  2467. return [] if uname.blank?
  2468. web_info = fetch_web_profile_info(uname)
  2469. user = web_info.is_a?(Hash) ? web_info.dig("data", "user") : nil
  2470. user_id = user.is_a?(Hash) ? user["id"].to_s.strip : ""
  2471. return [] if user_id.blank?
  2472. max_results = limit.to_i.clamp(1, 100)
  2473. max_id = nil
  2474. safety = 0
  2475. mutuals = []
  2476. seen_usernames = Set.new
  2477. following_usernames_cache = nil
  2478. loop do
  2479. break if mutuals.length >= max_results
  2480. safety += 1
  2481. break if safety > 25
  2482. query = [ "count=200", "search_surface=follow_list_page", "query=", "enable_groups=true" ]
  2483. query << "max_id=#{CGI.escape(max_id)}" if max_id.present?
  2484. path = "/api/v1/friendships/#{user_id}/followers/?#{query.join('&')}"
  2485. body = ig_api_get_json(path: path, referer: "#{INSTAGRAM_BASE_URL}/#{uname}/")
  2486. break unless body.is_a?(Hash)
  2487. users = Array(body["users"]).select { |entry| entry.is_a?(Hash) }
  2488. break if users.empty?
  2489. users.each do |entry|
  2490. username = normalize_username(entry["username"])
  2491. next if username.blank? || seen_usernames.include?(username)
  2492. friendship_status = entry["friendship_status"].is_a?(Hash) ? entry["friendship_status"] : {}
  2493. follows_from_status =
  2494. if friendship_status.key?("following")
  2495. ActiveModel::Type::Boolean.new.cast(friendship_status["following"])
  2496. end
  2497. viewer_follows =
  2498. if follows_from_status.nil?
  2499. following_usernames_cache ||= @account.instagram_profiles.where(following: true).pluck(:username).map { |u| normalize_username(u) }.to_set
  2500. following_usernames_cache.include?(username)
  2501. else
  2502. follows_from_status
  2503. end
  2504. next unless viewer_follows
  2505. seen_usernames << username
  2506. mutuals << {
  2507. username: username,
  2508. display_name: entry["full_name"].to_s.strip.presence || username,
  2509. profile_pic_url: CGI.unescapeHTML(entry["profile_pic_url"].to_s).strip.presence
  2510. }
  2511. break if mutuals.length >= max_results
  2512. end
  2513. max_id = body["next_max_id"].to_s.strip.presence
  2514. break if max_id.blank?
  2515. end
  2516. mutuals
  2517. rescue StandardError
  2518. []
  2519. end
  2520. def fetch_conversation_users_via_api(limit: 120)
  2521. users = {}
  2522. cursor = nil
  2523. remaining = limit.to_i.clamp(1, 400)
  2524. safety = 0
  2525. loop do
  2526. safety += 1
  2527. break if safety > 12
  2528. break if remaining <= 0
  2529. count = [ remaining, 50 ].min
  2530. q = [ "limit=#{count}", "visual_message_return_type=unseen" ]
  2531. q << "cursor=#{CGI.escape(cursor)}" if cursor.present?
  2532. path = "/api/v1/direct_v2/inbox/?#{q.join('&')}"
  2533. body = ig_api_get_json(path: path, referer: "#{INSTAGRAM_BASE_URL}/direct/inbox/")
  2534. break unless body.is_a?(Hash)
  2535. inbox = body["inbox"].is_a?(Hash) ? body["inbox"] : {}
  2536. threads = Array(inbox["threads"])
  2537. break if threads.empty?
  2538. threads.each do |thread|
  2539. next unless thread.is_a?(Hash)
  2540. Array(thread["thread_users"]).each do |u|
  2541. next unless u.is_a?(Hash)
  2542. username = normalize_username(u["username"])
  2543. next if username.blank?
  2544. users[username] ||= {
  2545. display_name: u["full_name"].to_s.strip.presence || username,
  2546. profile_pic_url: CGI.unescapeHTML(u["profile_pic_url"].to_s).strip.presence
  2547. }
  2548. end
  2549. end
  2550. remaining -= threads.length
  2551. cursor = inbox["oldest_cursor"].to_s.strip.presence
  2552. break if cursor.blank?
  2553. end
  2554. users
  2555. rescue StandardError
  2556. {}
  2557. end
  2558. def fetch_story_users_via_api
  2559. body = ig_api_get_json(path: "/api/v1/feed/reels_tray/", referer: INSTAGRAM_BASE_URL)
  2560. return {} unless body.is_a?(Hash)
  2561. tray_items =
  2562. if body["tray"].is_a?(Array)
  2563. body["tray"]
  2564. elsif body["tray"].is_a?(Hash)
  2565. Array(body.dig("tray", "items"))
  2566. else
  2567. []
  2568. end
  2569. users = {}
  2570. tray_items.each do |item|
  2571. next unless item.is_a?(Hash)
  2572. user = item["user"].is_a?(Hash) ? item["user"] : item
  2573. username = normalize_username(user["username"])
  2574. next if username.blank?
  2575. users[username] ||= {
  2576. display_name: user["full_name"].to_s.strip.presence || username,
  2577. profile_pic_url: CGI.unescapeHTML(user["profile_pic_url"].to_s).strip.presence
  2578. }
  2579. end
  2580. users
  2581. rescue StandardError
  2582. {}
  2583. end
  2584. def fetch_home_feed_items_via_api(limit: 50)
  2585. n = limit.to_i.clamp(1, 60)
  2586. body = ig_api_get_json(path: "/api/v1/feed/timeline/?count=#{n}", referer: INSTAGRAM_BASE_URL)
  2587. return [] unless body.is_a?(Hash)
  2588. # Newer payloads often use feed_items with nested media_or_ad.
  2589. feed_items = Array(body["feed_items"])
  2590. raw_items =
  2591. if feed_items.present?
  2592. feed_items.map { |entry| entry.is_a?(Hash) ? (entry["media_or_ad"] || entry["media"]) : nil }.compact
  2593. else
  2594. Array(body["items"])
  2595. end
  2596. raw_items.filter_map { |item| extract_home_feed_item_from_api(item) }.first(n)
  2597. rescue StandardError
  2598. []
  2599. end
  2600. def extract_home_feed_item_from_api(item)
  2601. return nil unless item.is_a?(Hash)
  2602. shortcode = (item["code"] || item["shortcode"]).to_s.strip
  2603. return nil if shortcode.blank?
  2604. media_type = item["media_type"].to_i
  2605. product_type = item["product_type"].to_s.downcase
  2606. post_kind = product_type.include?("clips") ? "reel" : "post"
  2607. post_kind = "post" if post_kind.blank?
  2608. image_candidate =
  2609. if media_type == 8
  2610. carousel = Array(item["carousel_media"]).select { |m| m.is_a?(Hash) }
  2611. chosen = carousel.find { |m| m["media_type"].to_i == 2 } || carousel.find { |m| m["media_type"].to_i == 1 } || carousel.first
  2612. chosen&.dig("image_versions2", "candidates", 0)
  2613. else
  2614. item.dig("image_versions2", "candidates", 0)
  2615. end
  2616. video_candidate =
  2617. if media_type == 8
  2618. carousel = Array(item["carousel_media"]).select { |m| m.is_a?(Hash) }
  2619. chosen = carousel.find { |m| m["media_type"].to_i == 2 } || carousel.first
  2620. Array(chosen&.dig("video_versions")).first
  2621. else
  2622. Array(item["video_versions"]).first
  2623. end
  2624. image_url = CGI.unescapeHTML(image_candidate&.dig("url").to_s).strip.presence
  2625. video_url = CGI.unescapeHTML(video_candidate&.dig("url").to_s).strip.presence
  2626. width = image_candidate&.dig("width")
  2627. height = image_candidate&.dig("height")
  2628. {
  2629. shortcode: shortcode,
  2630. post_kind: post_kind,
  2631. author_username: normalize_username(item.dig("user", "username").to_s),
  2632. media_url: (video_url.presence || image_url).to_s,
  2633. caption: item.dig("caption", "text").to_s.presence,
  2634. metadata: {
  2635. source: "api_timeline",
  2636. media_id: (item["pk"] || item["id"]).to_s.presence,
  2637. media_type: media_type,
  2638. media_url_image: image_url.to_s.presence,
  2639. media_url_video: video_url.to_s.presence,
  2640. product_type: product_type,
  2641. natural_width: width,
  2642. natural_height: height
  2643. }
  2644. }
  2645. rescue StandardError
  2646. nil
  2647. end
  2648. def ig_api_get_json(path:, referer:)
  2649. uri = URI.parse(path.to_s.start_with?("http") ? path.to_s : "#{INSTAGRAM_BASE_URL}#{path}")
  2650. http = Net::HTTP.new(uri.host, uri.port)
  2651. http.use_ssl = (uri.scheme == "https")
  2652. http.open_timeout = 10
  2653. http.read_timeout = 20
  2654. req = Net::HTTP::Get.new(uri.request_uri)
  2655. req["User-Agent"] = @account.user_agent.presence || "Mozilla/5.0"
  2656. req["Accept"] = "application/json, text/plain, */*"
  2657. req["X-Requested-With"] = "XMLHttpRequest"
  2658. req["X-IG-App-ID"] = (@account.auth_snapshot.dig("ig_app_id").presence || "936619743392459")
  2659. req["Referer"] = referer.to_s
  2660. csrf = @account.cookies.find { |c| c["name"].to_s == "csrftoken" }&.dig("value").to_s
  2661. req["X-CSRFToken"] = csrf if csrf.present?
  2662. req["Cookie"] = cookie_header_for(@account.cookies)
  2663. res = http.request(req)
  2664. return nil unless res.is_a?(Net::HTTPSuccess)
  2665. return nil unless res["content-type"].to_s.include?("json")
  2666. JSON.parse(res.body.to_s)
  2667. rescue StandardError
  2668. nil
  2669. end
  2670. def fetch_story_reel(user_id:, referer_username:)
  2671. uri = URI.parse("#{INSTAGRAM_BASE_URL}/api/v1/feed/reels_media/?reel_ids=#{CGI.escape(user_id.to_s)}")
  2672. http = Net::HTTP.new(uri.host, uri.port)
  2673. http.use_ssl = (uri.scheme == "https")
  2674. http.open_timeout = 10
  2675. http.read_timeout = 20
  2676. req = Net::HTTP::Get.new(uri.request_uri)
  2677. req["User-Agent"] = @account.user_agent.presence || "Mozilla/5.0"
  2678. req["Accept"] = "application/json, text/plain, */*"
  2679. req["X-Requested-With"] = "XMLHttpRequest"
  2680. req["X-IG-App-ID"] = (@account.auth_snapshot.dig("ig_app_id").presence || "936619743392459")
  2681. req["Referer"] = "#{INSTAGRAM_BASE_URL}/#{referer_username}/"
  2682. csrf = @account.cookies.find { |c| c["name"].to_s == "csrftoken" }&.dig("value").to_s
  2683. req["X-CSRFToken"] = csrf if csrf.present?
  2684. req["Cookie"] = cookie_header_for(@account.cookies)
  2685. res = http.request(req)
  2686. return nil unless res.is_a?(Net::HTTPSuccess)
  2687. body = JSON.parse(res.body.to_s)
  2688. # Debug: Capture raw story reel data
  2689. debug_story_reel_data(referer_username: referer_username, user_id: user_id, body: body)
  2690. reels = body["reels"]
  2691. if reels.is_a?(Hash)
  2692. direct = reels[user_id.to_s]
  2693. return direct if direct.is_a?(Hash)
  2694. by_owner = reels.values.find { |entry| reel_entry_owner_id(entry) == user_id.to_s }
  2695. return by_owner if by_owner.is_a?(Hash)
  2696. if reels.size == 1
  2697. Ops::StructuredLogger.warn(
  2698. event: "instagram.story_reel.single_reel_without_key_match",
  2699. payload: {
  2700. requested_user_id: user_id.to_s,
  2701. referer_username: referer_username.to_s,
  2702. available_reel_keys: reels.keys.first(6)
  2703. }
  2704. )
  2705. return reels.values.first
  2706. end
  2707. Ops::StructuredLogger.warn(
  2708. event: "instagram.story_reel.requested_reel_missing",
  2709. payload: {
  2710. requested_user_id: user_id.to_s,
  2711. referer_username: referer_username.to_s,
  2712. available_reel_keys: reels.keys.first(10),
  2713. reels_count: reels.size
  2714. }
  2715. )
  2716. return nil
  2717. end
  2718. reels_media = body["reels_media"]
  2719. if reels_media.is_a?(Array)
  2720. by_owner = reels_media.find { |entry| reel_entry_owner_id(entry) == user_id.to_s }
  2721. return by_owner if by_owner.is_a?(Hash)
  2722. if reels_media.length == 1
  2723. Ops::StructuredLogger.warn(
  2724. event: "instagram.story_reel.single_reel_media_without_owner_match",
  2725. payload: {
  2726. requested_user_id: user_id.to_s,
  2727. referer_username: referer_username.to_s
  2728. }
  2729. )
  2730. return reels_media.first
  2731. end
  2732. Ops::StructuredLogger.warn(
  2733. event: "instagram.story_reel.reels_media_owner_missing",
  2734. payload: {
  2735. requested_user_id: user_id.to_s,
  2736. referer_username: referer_username.to_s,
  2737. reels_media_count: reels_media.length
  2738. }
  2739. )
  2740. return nil
  2741. end
  2742. body
  2743. rescue StandardError
  2744. nil
  2745. end
  2746. # API-only story media resolution:
  2747. # 1) /api/v1/users/web_profile_info?username=...
  2748. # 2) /api/v1/feed/reels_media/?reel_ids=<user_id>
  2749. # Never falls back to DOM media scraping.
  2750. def resolve_story_media_for_current_context(driver:, username:, story_id:, fallback_story_key:, cache: nil)
  2751. uname = normalize_username(username)
  2752. sid = story_id.to_s.strip
  2753. sid = "" if sid.casecmp("unknown").zero?
  2754. api_story = resolve_story_item_via_api(username: uname, story_id: sid, cache: cache)
  2755. if api_story.is_a?(Hash)
  2756. url = api_story[:media_url].to_s
  2757. if url.present?
  2758. return {
  2759. media_type: api_story[:media_type].to_s.presence || "unknown",
  2760. url: url,
  2761. width: api_story[:width],
  2762. height: api_story[:height],
  2763. source: "api_reels_media",
  2764. story_id: api_story[:story_id].to_s,
  2765. image_url: api_story[:image_url].to_s.presence,
  2766. video_url: api_story[:video_url].to_s.presence,
  2767. owner_user_id: api_story[:owner_user_id].to_s.presence,
  2768. owner_username: api_story[:owner_username].to_s.presence,
  2769. media_variant_count: Array(api_story[:media_variants]).length,
  2770. primary_media_index: api_story[:primary_media_index],
  2771. primary_media_source: api_story[:primary_media_source].to_s.presence,
  2772. carousel_media: Array(api_story[:carousel_media])
  2773. }
  2774. end
  2775. end
  2776. Ops::StructuredLogger.warn(
  2777. event: "instagram.story_media.api_unresolved",
  2778. payload: {
  2779. username: uname,
  2780. story_id: sid.presence || fallback_story_key.to_s,
  2781. source: "api_only_resolution"
  2782. }
  2783. )
  2784. {
  2785. media_type: nil,
  2786. url: nil,
  2787. width: nil,
  2788. height: nil,
  2789. source: "api_unresolved",
  2790. story_id: sid.presence || fallback_story_key.to_s,
  2791. image_url: nil,
  2792. video_url: nil,
  2793. owner_user_id: nil,
  2794. owner_username: nil,
  2795. media_variant_count: 0,
  2796. primary_media_index: nil,
  2797. primary_media_source: nil,
  2798. carousel_media: []
  2799. }
  2800. rescue StandardError
  2801. {
  2802. media_type: nil,
  2803. url: nil,
  2804. width: nil,
  2805. height: nil,
  2806. source: "api_unresolved_error",
  2807. story_id: sid.presence || fallback_story_key.to_s,
  2808. image_url: nil,
  2809. video_url: nil,
  2810. owner_user_id: nil,
  2811. owner_username: nil,
  2812. media_variant_count: 0,
  2813. primary_media_index: nil,
  2814. primary_media_source: nil,
  2815. carousel_media: []
  2816. }
  2817. end
  2818. def resolve_story_item_via_api(username:, story_id:, cache: nil)
  2819. uname = normalize_username(username)
  2820. return nil if uname.blank?
  2821. items = fetch_story_items_via_api(username: uname, cache: cache)
  2822. return nil unless items.is_a?(Array)
  2823. return nil if items.empty?
  2824. sid = story_id.to_s.strip
  2825. if sid.present?
  2826. item = items.find { |s| s.is_a?(Hash) && s[:story_id].to_s == sid }
  2827. return item if item
  2828. end
  2829. # Only pick first item without story_id when unambiguous.
  2830. return items.first if sid.blank? && items.length == 1
  2831. nil
  2832. rescue StandardError
  2833. nil
  2834. end
  2835. def fetch_story_items_via_api(username:, cache: nil)
  2836. uname = normalize_username(username)
  2837. return [] if uname.blank?
  2838. cache_key = "stories:#{uname}"
  2839. if cache.is_a?(Hash) && cache[cache_key].is_a?(Hash)
  2840. cached = cache[cache_key][:items]
  2841. return cached if cached.is_a?(Array)
  2842. end
  2843. web_info = fetch_web_profile_info(uname)
  2844. user = web_info.is_a?(Hash) ? web_info.dig("data", "user") : nil
  2845. user_id = user.is_a?(Hash) ? user["id"].to_s.strip : ""
  2846. return [] if user_id.blank?
  2847. reel = fetch_story_reel(user_id: user_id, referer_username: uname)
  2848. raw_items = reel.is_a?(Hash) ? Array(reel["items"]) : []
  2849. stories = raw_items.filter_map { |item| extract_story_item(item, username: uname, reel_owner_id: user_id) }
  2850. if cache.is_a?(Hash)
  2851. cache[cache_key] = { user_id: user_id, items: stories, fetched_at: Time.current.utc.iso8601(3) }
  2852. end
  2853. stories
  2854. rescue StandardError
  2855. []
  2856. end
  2857. def extract_story_item(item, username:, reel_owner_id: nil)
  2858. return nil unless item.is_a?(Hash)
  2859. story_id = (item["pk"] || item["id"]).to_s.split("_").first.to_s.strip
  2860. return nil if story_id.blank?
  2861. media_variants = extract_story_media_variants_from_item(item)
  2862. selected_variant = choose_primary_story_media_variant(variants: media_variants)
  2863. media_type = selected_variant[:media_type].to_s.presence || story_media_type(item["media_type"])
  2864. image_url = selected_variant[:image_url].to_s.presence
  2865. video_url = selected_variant[:video_url].to_s.presence
  2866. media_url = selected_variant[:media_url].to_s.presence || video_url.presence || image_url.presence
  2867. width = selected_variant[:width]
  2868. height = selected_variant[:height]
  2869. owner_id = (item.dig("owner", "id") || item.dig("owner", "pk") || item.dig("user", "id") || item.dig("user", "pk")).to_s.strip
  2870. owner_username = normalize_username(item.dig("user", "username").to_s)
  2871. external_story_ctx = detect_external_story_attribution_from_item(
  2872. item: item,
  2873. reel_owner_id: reel_owner_id.to_s.presence || owner_id,
  2874. reel_username: username
  2875. )
  2876. {
  2877. story_id: story_id,
  2878. media_type: media_type,
  2879. media_url: media_url.presence || image_url.presence || video_url.presence,
  2880. image_url: image_url.presence,
  2881. video_url: video_url.presence,
  2882. can_reply: item.key?("can_reply") ? ActiveModel::Type::Boolean.new.cast(item["can_reply"]) : nil,
  2883. can_reshare: item.key?("can_reshare") ? ActiveModel::Type::Boolean.new.cast(item["can_reshare"]) : nil,
  2884. owner_user_id: owner_id.presence,
  2885. owner_username: owner_username.presence,
  2886. api_has_external_profile_indicator: external_story_ctx[:has_external_profile_indicator],
  2887. api_external_profile_reason: external_story_ctx[:reason_code],
  2888. api_external_profile_targets: external_story_ctx[:targets],
  2889. api_should_skip: external_story_ctx[:has_external_profile_indicator],
  2890. api_raw_media_type: item["media_type"].to_i,
  2891. primary_media_source: selected_variant[:source].to_s.presence,
  2892. primary_media_index: selected_variant[:index],
  2893. media_variants: media_variants,
  2894. carousel_media: media_variants.select { |entry| entry[:source].to_s == "carousel_media" },
  2895. width: width.to_i.positive? ? width.to_i : nil,
  2896. height: height.to_i.positive? ? height.to_i : nil,
  2897. caption: item.dig("caption", "text").to_s.presence,
  2898. taken_at: parse_unix_time(item["taken_at"] || item["taken_at_timestamp"]),
  2899. expiring_at: parse_unix_time(item["expiring_at"] || item["expiring_at_timestamp"]),
  2900. permalink: "#{INSTAGRAM_BASE_URL}/stories/#{username}/#{story_id}/"
  2901. }
  2902. rescue StandardError
  2903. nil
  2904. end
  2905. def extract_story_media_variants_from_item(item)
  2906. return [] unless item.is_a?(Hash)
  2907. variants = []
  2908. variants << build_story_media_variant(item: item, source: "root", index: 0)
  2909. Array(item["carousel_media"]).each_with_index do |entry, idx|
  2910. variants << build_story_media_variant(item: entry, source: "carousel_media", index: idx + 1)
  2911. end
  2912. variants.compact.select { |entry| entry[:media_url].to_s.present? }
  2913. rescue StandardError
  2914. []
  2915. end
  2916. def build_story_media_variant(item:, source:, index:)
  2917. return nil unless item.is_a?(Hash)
  2918. media_type = story_media_type(item["media_type"])
  2919. image_candidate = item.dig("image_versions2", "candidates", 0)
  2920. video_candidate = Array(item["video_versions"]).first
  2921. image_url = CGI.unescapeHTML(image_candidate&.dig("url").to_s).strip.presence
  2922. video_url = CGI.unescapeHTML(video_candidate&.dig("url").to_s).strip.presence
  2923. media_url = media_type == "video" ? (video_url.presence || image_url.presence) : (image_url.presence || video_url.presence)
  2924. width = item["original_width"] || image_candidate&.dig("width") || video_candidate&.dig("width")
  2925. height = item["original_height"] || image_candidate&.dig("height") || video_candidate&.dig("height")
  2926. {
  2927. source: source.to_s,
  2928. index: index.to_i,
  2929. media_pk: (item["pk"] || item["id"]).to_s.split("_").first.to_s.presence,
  2930. raw_media_type: item["media_type"].to_i,
  2931. media_type: media_type,
  2932. media_url: media_url.to_s.presence,
  2933. image_url: image_url,
  2934. video_url: video_url,
  2935. width: width.to_i.positive? ? width.to_i : nil,
  2936. height: height.to_i.positive? ? height.to_i : nil
  2937. }
  2938. rescue StandardError
  2939. nil
  2940. end
  2941. def choose_primary_story_media_variant(variants:)
  2942. list = Array(variants).select { |entry| entry.is_a?(Hash) && entry[:media_url].to_s.present? }
  2943. return {} if list.empty?
  2944. root = list.find { |entry| entry[:source].to_s == "root" }
  2945. return root if root
  2946. video = list.find { |entry| entry[:media_type].to_s == "video" }
  2947. return video if video
  2948. list.first
  2949. rescue StandardError
  2950. {}
  2951. end
  2952. def compact_story_media_variants_for_metadata(variants, limit: 8)
  2953. Array(variants).first(limit.to_i.clamp(1, 20)).filter_map do |entry|
  2954. data = entry.is_a?(Hash) ? entry : {}
  2955. source = data[:source] || data["source"]
  2956. media_type = data[:media_type] || data["media_type"]
  2957. media_url = data[:media_url] || data["media_url"]
  2958. next nil if media_url.to_s.blank?
  2959. {
  2960. source: source.to_s.presence,
  2961. index: data[:index] || data["index"],
  2962. media_pk: (data[:media_pk] || data["media_pk"]).to_s.presence,
  2963. media_type: media_type.to_s.presence,
  2964. media_url: media_url.to_s.presence,
  2965. image_url: (data[:image_url] || data["image_url"]).to_s.presence,
  2966. video_url: (data[:video_url] || data["video_url"]).to_s.presence,
  2967. width: data[:width] || data["width"],
  2968. height: data[:height] || data["height"]
  2969. }.compact
  2970. end
  2971. rescue StandardError
  2972. []
  2973. end
  2974. def detect_external_story_attribution_from_item(item:, reel_owner_id:, reel_username:)
  2975. return { has_external_profile_indicator: false, reason_code: nil, targets: [] } unless item.is_a?(Hash)
  2976. reasons = []
  2977. targets = []
  2978. normalized_owner_username = normalize_username(reel_username)
  2979. owner_id = (item.dig("owner", "id") || item.dig("owner", "pk")).to_s.strip
  2980. if owner_id.present? && reel_owner_id.to_s.present? && owner_id != reel_owner_id.to_s
  2981. reasons << "owner_id_mismatch"
  2982. targets << owner_id
  2983. end
  2984. story_feed_media = Array(item["story_feed_media"])
  2985. if story_feed_media.any?
  2986. sfm_targets = extract_story_feed_media_targets(story_feed_media)
  2987. sfm_external_targets = sfm_targets.select do |target|
  2988. external_story_target?(target, reel_owner_id: reel_owner_id, reel_username: normalized_owner_username)
  2989. end
  2990. if sfm_external_targets.any?
  2991. reasons << "story_feed_media_external"
  2992. targets.concat(sfm_external_targets)
  2993. end
  2994. end
  2995. media_attribution_targets = extract_media_attribution_targets(Array(item["media_attributions_data"]))
  2996. external_media_attribution_targets = media_attribution_targets.select do |target|
  2997. external_story_target?(target, reel_owner_id: reel_owner_id, reel_username: normalized_owner_username)
  2998. end
  2999. if external_media_attribution_targets.any?
  3000. reasons << "media_attributions_external"
  3001. targets.concat(external_media_attribution_targets)
  3002. end
  3003. mention_targets = extract_reel_mention_targets(Array(item["reel_mentions"]))
  3004. external_mention_targets = mention_targets.select do |target|
  3005. external_story_target?(target, reel_owner_id: reel_owner_id, reel_username: normalized_owner_username)
  3006. end
  3007. if external_mention_targets.any?
  3008. reasons << "reel_mentions_external"
  3009. targets.concat(external_mention_targets)
  3010. end
  3011. reasons << "reshare_of_text_post" if item["is_reshare_of_text_post_app_media_in_ig"] == true
  3012. owner_username = normalize_username(item.dig("user", "username").to_s)
  3013. if owner_username.present? && normalized_owner_username.present? && owner_username != normalized_owner_username
  3014. reasons << "owner_username_mismatch"
  3015. targets << owner_username
  3016. end
  3017. reason_codes = reasons.uniq
  3018. {
  3019. has_external_profile_indicator: reason_codes.any?,
  3020. reason_code: reason_codes.first,
  3021. targets: targets.map(&:to_s).map(&:strip).reject(&:blank?).uniq.first(12)
  3022. }
  3023. rescue StandardError
  3024. { has_external_profile_indicator: false, reason_code: nil, targets: [] }
  3025. end
  3026. def external_story_target?(target, reel_owner_id:, reel_username:)
  3027. value = target.to_s.strip
  3028. return false if value.blank?
  3029. if value.match?(/\A\d+\z/)
  3030. owner_id = reel_owner_id.to_s.strip
  3031. return false if owner_id.blank?
  3032. return value != owner_id
  3033. end
  3034. owner_username = normalize_username(reel_username)
  3035. normalized_value = normalize_username(value)
  3036. return false if owner_username.blank? || normalized_value.blank?
  3037. normalized_value != owner_username
  3038. rescue StandardError
  3039. false
  3040. end
  3041. def extract_story_feed_media_targets(story_feed_media)
  3042. Array(story_feed_media).filter_map do |entry|
  3043. next unless entry.is_a?(Hash)
  3044. media_owner_id = (
  3045. entry.dig("media", "user", "id") ||
  3046. entry.dig("media", "user", "pk") ||
  3047. entry.dig("user", "id") ||
  3048. entry.dig("user", "pk")
  3049. ).to_s.strip
  3050. next media_owner_id if media_owner_id.present?
  3051. media_owner_username = normalize_username(
  3052. entry.dig("media", "user", "username").to_s.presence ||
  3053. entry.dig("user", "username").to_s
  3054. )
  3055. next media_owner_username if media_owner_username.present?
  3056. compound = entry["media_compound_str"].to_s.strip
  3057. next "" if compound.blank?
  3058. next "" unless compound.include?("_")
  3059. compound.split("_")[1].to_s.strip
  3060. end.reject(&:blank?)
  3061. rescue StandardError
  3062. []
  3063. end
  3064. def extract_reel_mention_targets(reel_mentions)
  3065. Array(reel_mentions).filter_map do |mention|
  3066. next unless mention.is_a?(Hash)
  3067. user_id = (mention.dig("user", "id") || mention.dig("user", "pk") || mention["user_id"]).to_s.strip
  3068. next user_id if user_id.present?
  3069. username = normalize_username(mention.dig("user", "username").to_s.presence || mention["username"].to_s)
  3070. next username if username.present?
  3071. nil
  3072. end
  3073. rescue StandardError
  3074. []
  3075. end
  3076. def extract_media_attribution_targets(media_attributions_data)
  3077. targets = []
  3078. Array(media_attributions_data).each do |entry|
  3079. collect_candidate_user_targets(entry, targets)
  3080. end
  3081. targets.map(&:to_s).map(&:strip).reject(&:blank?).uniq
  3082. rescue StandardError
  3083. []
  3084. end
  3085. def collect_candidate_user_targets(node, targets)
  3086. return if node.nil?
  3087. if node.is_a?(Array)
  3088. node.each { |child| collect_candidate_user_targets(child, targets) }
  3089. return
  3090. end
  3091. return unless node.is_a?(Hash)
  3092. username_keys = %w[username owner_username mentioned_username]
  3093. id_keys = %w[user_id owner_id mentioned_user_id pk id]
  3094. username_keys.each do |key|
  3095. value = normalize_username(node[key].to_s)
  3096. targets << value if value.present?
  3097. end
  3098. id_keys.each do |key|
  3099. value = node[key].to_s.strip
  3100. targets << value if value.match?(/\A\d+\z/)
  3101. end
  3102. node.each_value { |child| collect_candidate_user_targets(child, targets) if child.is_a?(Hash) || child.is_a?(Array) }
  3103. end
  3104. def story_media_type(value)
  3105. case value.to_i
  3106. when 2 then "video"
  3107. else "image"
  3108. end
  3109. end
  3110. def debug_story_reel_data(referer_username:, user_id:, body:)
  3111. begin
  3112. # Create debug directory if it doesn't exist
  3113. debug_dir = Rails.root.join("tmp", "story_reel_debug")
  3114. FileUtils.mkdir_p(debug_dir) unless Dir.exist?(debug_dir)
  3115. # Generate filename with timestamp
  3116. timestamp = Time.current.strftime("%Y%m%d_%H%M%S_%L")
  3117. filename = "#{referer_username}_reel_#{user_id}_#{timestamp}.json"
  3118. filepath = File.join(debug_dir, filename)
  3119. # Extract relevant debug information
  3120. debug_data = {
  3121. timestamp: Time.current.iso8601,
  3122. referer_username: referer_username,
  3123. user_id: user_id,
  3124. raw_response: body,
  3125. reels_count: body["reels"]&.keys&.size || 0,
  3126. reels_media_count: body["reels_media"]&.size || 0,
  3127. items_count: extract_items_count_from_body(body)
  3128. }
  3129. # Write debug data to file
  3130. File.write(filepath, JSON.pretty_generate(debug_data))
  3131. # Log the debug file creation
  3132. Rails.logger.info "[STORY_REEL_DEBUG] Debug data saved: #{filepath}"
  3133. rescue StandardError => e
  3134. Rails.logger.error "[STORY_REEL_DEBUG] Failed to capture debug data: #{e.message}"
  3135. # Don't fail the entire request if debug capture fails
  3136. end
  3137. end
  3138. def extract_items_count_from_body(body)
  3139. items = []
  3140. if body["reels"].is_a?(Hash)
  3141. body["reels"].each do |reel_id, reel_data|
  3142. if reel_data.is_a?(Hash) && reel_data["items"].is_a?(Array)
  3143. items.concat(reel_data["items"])
  3144. end
  3145. end
  3146. end
  3147. if body["reels_media"].is_a?(Array)
  3148. body["reels_media"].each do |reel_data|
  3149. if reel_data.is_a?(Hash) && reel_data["items"].is_a?(Array)
  3150. items.concat(reel_data["items"])
  3151. end
  3152. end
  3153. end
  3154. items.size
  3155. end
  3156. def reel_entry_owner_id(entry)
  3157. return "" unless entry.is_a?(Hash)
  3158. (
  3159. entry.dig("user", "id") ||
  3160. entry.dig("user", "pk") ||
  3161. entry.dig("owner", "id") ||
  3162. entry.dig("owner", "pk") ||
  3163. entry["id"] ||
  3164. entry["pk"]
  3165. ).to_s.strip
  3166. rescue StandardError
  3167. ""
  3168. end
  3169. def extract_post_for_analysis(item, comments_limit:, referer_username:)
  3170. return nil unless item.is_a?(Hash)
  3171. media_type = item["media_type"].to_i
  3172. product_type = item["product_type"].to_s.downcase
  3173. post_kind = product_type.include?("clips") ? "reel" : "post"
  3174. is_repost =
  3175. ActiveModel::Type::Boolean.new.cast(item["is_repost"]) ||
  3176. item.dig("reshared_content", "pk").present? ||
  3177. item["reshare_count"].to_i.positive?
  3178. image_url = nil
  3179. video_url = nil
  3180. if media_type == 1
  3181. image_url = item.dig("image_versions2", "candidates", 0, "url").to_s
  3182. elsif media_type == 2
  3183. video_url = Array(item["video_versions"]).first&.dig("url").to_s
  3184. image_url = item.dig("image_versions2", "candidates", 0, "url").to_s
  3185. elsif media_type == 8
  3186. carousel = Array(item["carousel_media"]).select { |m| m.is_a?(Hash) }
  3187. vid = carousel.find { |m| m["media_type"].to_i == 2 }
  3188. img = carousel.find { |m| m["media_type"].to_i == 1 }
  3189. video_url = Array(vid&.dig("video_versions")).first&.dig("url").to_s
  3190. image_url = vid&.dig("image_versions2", "candidates", 0, "url").to_s.presence || img&.dig("image_versions2", "candidates", 0, "url").to_s
  3191. else
  3192. return nil
  3193. end
  3194. image_url = CGI.unescapeHTML(image_url).strip
  3195. video_url = CGI.unescapeHTML(video_url).strip
  3196. media_url = video_url.presence || image_url.presence
  3197. return nil if media_url.blank?
  3198. media_pk = item["pk"].presence || item["id"].to_s.split("_").first
  3199. comments = fetch_media_comments(media_id: media_pk, referer_username: referer_username, count: comments_limit)
  3200. comments = extract_preview_comments(item, comments_limit: comments_limit) if comments.empty?
  3201. taken_at = parse_unix_time(item["taken_at"])
  3202. shortcode = (item["code"] || item["shortcode"]).to_s.strip.presence
  3203. permalink = shortcode.present? ? "#{INSTAGRAM_BASE_URL}/p/#{shortcode}/" : nil
  3204. {
  3205. shortcode: shortcode,
  3206. media_id: media_pk.to_s.presence,
  3207. post_kind: post_kind,
  3208. product_type: product_type.presence,
  3209. is_repost: is_repost,
  3210. taken_at: taken_at,
  3211. caption: item.dig("caption", "text").to_s.presence,
  3212. media_url: media_url,
  3213. image_url: image_url,
  3214. video_url: video_url.presence,
  3215. media_type: media_type,
  3216. permalink: permalink,
  3217. likes_count: item["like_count"].to_i,
  3218. comments_count: item["comment_count"].to_i,
  3219. comments: comments
  3220. }
  3221. rescue StandardError
  3222. nil
  3223. end
  3224. def extract_preview_comments(item, comments_limit:)
  3225. Array(item["preview_comments"]).first(comments_limit).map do |c|
  3226. {
  3227. author_username: c.is_a?(Hash) ? c.dig("user", "username").to_s.strip : nil,
  3228. text: c.is_a?(Hash) ? c["text"].to_s : nil,
  3229. created_at: parse_unix_time(c.is_a?(Hash) ? c["created_at"] : nil)
  3230. }
  3231. end
  3232. end
  3233. def fetch_media_comments(media_id:, referer_username:, count:)
  3234. return [] if media_id.to_s.blank?
  3235. uri = URI.parse("#{INSTAGRAM_BASE_URL}/api/v1/media/#{media_id}/comments/?can_support_threading=true&permalink_enabled=true")
  3236. http = Net::HTTP.new(uri.host, uri.port)
  3237. http.use_ssl = (uri.scheme == "https")
  3238. http.open_timeout = 10
  3239. http.read_timeout = 20
  3240. req = Net::HTTP::Get.new(uri.request_uri)
  3241. req["User-Agent"] = @account.user_agent.presence || "Mozilla/5.0"
  3242. req["Accept"] = "application/json, text/plain, */*"
  3243. req["X-Requested-With"] = "XMLHttpRequest"
  3244. req["X-IG-App-ID"] = (@account.auth_snapshot.dig("ig_app_id").presence || "936619743392459")
  3245. req["Referer"] = "#{INSTAGRAM_BASE_URL}/#{referer_username}/"
  3246. csrf = @account.cookies.find { |c| c["name"].to_s == "csrftoken" }&.dig("value").to_s
  3247. req["X-CSRFToken"] = csrf if csrf.present?
  3248. req["Cookie"] = cookie_header_for(@account.cookies)
  3249. res = http.request(req)
  3250. return [] unless res.is_a?(Net::HTTPSuccess)
  3251. return [] unless res["content-type"].to_s.include?("json")
  3252. body = JSON.parse(res.body.to_s)
  3253. items = Array(body["comments"]).first(count.to_i.clamp(1, 50))
  3254. items.map do |c|
  3255. {
  3256. author_username: c.dig("user", "username").to_s.strip.presence,
  3257. text: c["text"].to_s,
  3258. created_at: parse_unix_time(c["created_at"])
  3259. }
  3260. end
  3261. rescue StandardError
  3262. []
  3263. end
  3264. def enrich_missing_post_comments_via_browser!(username:, posts:, comments_limit:)
  3265. target_posts = Array(posts).select do |post|
  3266. post.is_a?(Hash) &&
  3267. post[:media_id].to_s.present? &&
  3268. post[:comments_count].to_i.positive? &&
  3269. Array(post[:comments]).empty?
  3270. end
  3271. return if target_posts.empty?
  3272. with_recoverable_session(label: "profile_analysis_comments_fallback") do
  3273. with_authenticated_driver do |driver|
  3274. driver.navigate.to("#{INSTAGRAM_BASE_URL}/#{username}/")
  3275. wait_for(driver, css: "body", timeout: 10)
  3276. dismiss_common_overlays!(driver)
  3277. target_posts.each do |post|
  3278. comments = fetch_media_comments_from_browser_context(
  3279. driver: driver,
  3280. media_id: post[:media_id],
  3281. count: comments_limit
  3282. )
  3283. next if comments.empty?
  3284. post[:comments] = comments
  3285. rescue StandardError
  3286. next
  3287. end
  3288. end
  3289. end
  3290. rescue StandardError
  3291. nil
  3292. end
  3293. def fetch_media_comments_from_browser_context(driver:, media_id:, count:)
  3294. payload =
  3295. driver.execute_async_script(
  3296. <<~JS,
  3297. const mediaId = arguments[0];
  3298. const limit = arguments[1];
  3299. const done = arguments[arguments.length - 1];
  3300. fetch(`/api/v1/media/${mediaId}/comments/?can_support_threading=true&permalink_enabled=true`, {
  3301. method: "GET",
  3302. credentials: "include",
  3303. headers: {
  3304. "Accept": "application/json, text/plain, */*",
  3305. "X-Requested-With": "XMLHttpRequest"
  3306. }
  3307. })
  3308. .then(async (resp) => {
  3309. const text = await resp.text();
  3310. done({
  3311. ok: resp.ok,
  3312. status: resp.status,
  3313. content_type: resp.headers.get("content-type") || "",
  3314. body: text
  3315. });
  3316. })
  3317. .catch((err) => {
  3318. done({ ok: false, status: 0, content_type: "", body: "", error: String(err) });
  3319. });
  3320. JS
  3321. media_id.to_s,
  3322. count.to_i.clamp(1, 50)
  3323. )
  3324. return [] unless payload.is_a?(Hash)
  3325. return [] unless payload["ok"] == true
  3326. return [] unless payload["content_type"].to_s.include?("json")
  3327. body = JSON.parse(payload["body"].to_s)
  3328. items = Array(body["comments"]).first(count.to_i.clamp(1, 50))
  3329. items.map do |c|
  3330. {
  3331. author_username: c.dig("user", "username").to_s.strip.presence,
  3332. text: c["text"].to_s,
  3333. created_at: parse_unix_time(c["created_at"])
  3334. }
  3335. end
  3336. rescue StandardError
  3337. []
  3338. end
  3339. def parse_unix_time(value)
  3340. return nil if value.blank?
  3341. Time.at(value.to_i).utc
  3342. rescue StandardError
  3343. nil
  3344. end
  3345. def cookie_header_for(cookies)
  3346. Array(cookies).map do |c|
  3347. name = c["name"].to_s
  3348. value = c["value"].to_s
  3349. next if name.blank? || value.blank?
  3350. "#{name}=#{value}"
  3351. end.compact.join("; ")
  3352. end
  3353. def verify_messageability_from_api(username:)
  3354. uname = normalize_username(username)
  3355. return { can_message: nil, restriction_reason: "Username is blank", source: "api" } if uname.blank?
  3356. user_id = story_user_id_for(username: uname)
  3357. if user_id.blank?
  3358. return {
  3359. can_message: false,
  3360. restriction_reason: "Unable to resolve user id via API",
  3361. source: "api",
  3362. dm_state: "unknown",
  3363. dm_reason: "missing_user_id",
  3364. dm_retry_after_at: Time.current + 6.hours
  3365. }
  3366. end
  3367. thread_result = create_direct_thread_for_user(user_id: user_id, use_cache: false)
  3368. thread_id = thread_result[:thread_id].to_s
  3369. return { can_message: true, restriction_reason: nil, source: "api", dm_state: "messageable", dm_reason: "thread_created", dm_retry_after_at: nil } if thread_id.present?
  3370. reason = thread_result[:reason].to_s.presence || "missing_thread_id"
  3371. retry_after =
  3372. if thread_result[:api_http_status].to_i == 403
  3373. Time.current + STORY_INTERACTION_RETRY_DAYS.days
  3374. else
  3375. Time.current + 12.hours
  3376. end
  3377. {
  3378. can_message: false,
  3379. restriction_reason: "DM unavailable via API (#{reason})",
  3380. source: "api",
  3381. dm_state: "unavailable",
  3382. dm_reason: reason,
  3383. dm_retry_after_at: retry_after,
  3384. api_status: thread_result[:api_status],
  3385. api_http_status: thread_result[:api_http_status],
  3386. api_error_code: thread_result[:api_error_code]
  3387. }
  3388. rescue StandardError => e
  3389. {
  3390. can_message: nil,
  3391. restriction_reason: "Unable to verify messaging availability (api exception)",
  3392. source: "api",
  3393. dm_state: "unknown",
  3394. dm_reason: "exception:#{e.class.name}",
  3395. dm_retry_after_at: Time.current + 6.hours
  3396. }
  3397. end
  3398. def verify_messageability_from_driver(driver, username:)
  3399. username = normalize_username(username)
  3400. raise "Username cannot be blank" if username.blank?
  3401. with_task_capture(driver: driver, task_name: "profile_verify_messageability", meta: { username: username }) do
  3402. ok = open_dm(driver, username)
  3403. if !ok
  3404. {
  3405. can_message: false,
  3406. restriction_reason: "Unable to open DM thread",
  3407. source: "ui",
  3408. dm_state: "unavailable",
  3409. dm_reason: "unable_to_open_dm_thread",
  3410. dm_retry_after_at: Time.current + 12.hours
  3411. }
  3412. else
  3413. begin
  3414. wait_for_present(driver, css: dm_textbox_css, timeout: 10)
  3415. {
  3416. can_message: true,
  3417. restriction_reason: nil,
  3418. source: "ui",
  3419. dm_state: "messageable",
  3420. dm_reason: "composer_visible",
  3421. dm_retry_after_at: nil
  3422. }
  3423. rescue Selenium::WebDriver::Error::TimeoutError
  3424. {
  3425. can_message: false,
  3426. restriction_reason: "Unable to open message box",
  3427. source: "ui",
  3428. dm_state: "unavailable",
  3429. dm_reason: "message_box_unavailable",
  3430. dm_retry_after_at: Time.current + 12.hours
  3431. }
  3432. end
  3433. end
  3434. end
  3435. end
  3436. def open_dm_from_profile(driver, username)
  3437. driver.navigate.to("#{INSTAGRAM_BASE_URL}/#{username}/")
  3438. wait_for(driver, css: "body", timeout: 10)
  3439. dismiss_common_overlays!(driver)
  3440. human_pause
  3441. # Case-insensitive contains("message") across common clickable elements.
  3442. ci = "translate(normalize-space(.), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz')"
  3443. message_xpath = "//*[self::button or (self::div and @role='button') or self::a][contains(#{ci}, 'message')]"
  3444. aria_xpath = "//*[@aria-label and contains(translate(@aria-label,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'message')]"
  3445. # Wait for the CTA to be visible. Profile pages often render in stages; grabbing `.first` can pick a hidden node.
  3446. message_button =
  3447. begin
  3448. wait_for(driver, xpath: message_xpath, timeout: 10)
  3449. rescue Selenium::WebDriver::Error::TimeoutError
  3450. nil
  3451. end
  3452. message_button ||= driver.find_elements(xpath: aria_xpath).find { |el| el.displayed? rescue false }
  3453. return false unless message_button
  3454. click_ok =
  3455. begin
  3456. driver.action.move_to(message_button).click.perform
  3457. true
  3458. rescue StandardError
  3459. js_click(driver, message_button)
  3460. end
  3461. return false unless click_ok
  3462. maybe_capture_filmstrip(driver, label: "dm_open_profile_after_click")
  3463. true
  3464. end
  3465. def open_dm(driver, username)
  3466. username = normalize_username(username)
  3467. return false if username.blank?
  3468. # Strategy 1: profile page CTA
  3469. ok = with_task_capture(driver: driver, task_name: "dm_open_profile", meta: { username: username }) do
  3470. open_dm_from_profile(driver, username)
  3471. end
  3472. if ok
  3473. begin
  3474. wait_for_dm_composer_or_thread!(driver, timeout: 12)
  3475. return true
  3476. rescue Selenium::WebDriver::Error::TimeoutError
  3477. # fall through to next strategy
  3478. end
  3479. end
  3480. # Strategy 2: direct/new flow (SPA-safe)
  3481. ok2 = with_task_capture(driver: driver, task_name: "dm_open_direct_new", meta: { username: username }) do
  3482. open_dm_via_direct_new(driver, username)
  3483. end
  3484. return true if ok2
  3485. # On some IG builds the URL flips to the thread before the composer becomes queryable.
  3486. driver.current_url.to_s.include?("/direct/t/")
  3487. end
  3488. def open_dm_via_direct_new(driver, username)
  3489. driver.navigate.to("#{INSTAGRAM_BASE_URL}/direct/new/")
  3490. wait_for(driver, css: "body", timeout: 12)
  3491. dismiss_common_overlays!(driver)
  3492. human_pause
  3493. # Find a search box for recipients.
  3494. selectors = [
  3495. "input[name='queryBox']",
  3496. "input[placeholder*='Search']",
  3497. "input[aria-label*='Search']",
  3498. "input[type='text']"
  3499. ]
  3500. typed = false
  3501. 3.times do |attempt|
  3502. input =
  3503. selectors.lazy.map { |sel| driver.find_elements(css: sel).find(&:displayed?) }.find(&:present?) ||
  3504. selectors.lazy.map { |sel| driver.find_elements(css: sel).first }.find(&:present?)
  3505. break unless input
  3506. begin
  3507. input.click
  3508. # Clear any existing value.
  3509. input.send_keys([:control, "a"])
  3510. input.send_keys(:backspace)
  3511. input.send_keys(username)
  3512. typed = true
  3513. human_pause
  3514. break
  3515. rescue Selenium::WebDriver::Error::StaleElementReferenceError, Selenium::WebDriver::Error::ElementNotInteractableError
  3516. Rails.logger.info("open_dm_via_direct_new retry typing (attempt #{attempt + 1}/3)")
  3517. sleep(0.5)
  3518. next
  3519. end
  3520. end
  3521. return false unless typed
  3522. capture_task_html(driver: driver, task_name: "dm_open_direct_new_after_type", status: "ok", meta: { username: username })
  3523. # Wait for the username to appear in results and click it.
  3524. username_down = username.to_s.downcase
  3525. ci = "translate(normalize-space(.), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz')"
  3526. row_xpath = "//div[@role='button'][.//*[contains(#{ci}, '#{username_down}')]]"
  3527. row_with_img_xpath = "//div[@role='button'][.//*[contains(#{ci}, '#{username_down}')]]//img/ancestor::div[@role='button'][1]"
  3528. begin
  3529. Selenium::WebDriver::Wait.new(timeout: 12).until do
  3530. driver.find_elements(xpath: row_with_img_xpath).any? ||
  3531. driver.find_elements(xpath: row_xpath).any? ||
  3532. driver.find_elements(xpath: "//*[contains(#{ci}, '#{username_down}')]").any?
  3533. end
  3534. rescue Selenium::WebDriver::Error::TimeoutError
  3535. return false
  3536. end
  3537. candidate =
  3538. driver.find_elements(xpath: row_with_img_xpath).find { |el| el.displayed? rescue false } ||
  3539. driver.find_elements(xpath: row_xpath).find { |el| el.displayed? rescue false } ||
  3540. driver.find_elements(xpath: row_xpath).first ||
  3541. driver.find_elements(xpath: "//*[contains(#{ci}, '#{username_down}')]").find { |el| el.displayed? rescue false } ||
  3542. driver.find_elements(xpath: "//*[contains(#{ci}, '#{username_down}')]").first
  3543. return false unless candidate
  3544. # Click nearest clickable container; otherwise click the text node parent.
  3545. clickable =
  3546. begin
  3547. driver.execute_script(<<~JS, candidate)
  3548. const el = arguments[0];
  3549. // For direct/new, the row itself is usually role=button.
  3550. if (el && el.getAttribute && el.getAttribute("role") === "button") return el;
  3551. const btn = el.closest("button,[role='button']");
  3552. return btn || el;
  3553. JS
  3554. rescue StandardError
  3555. candidate
  3556. end
  3557. begin
  3558. driver.action.move_to(clickable).click.perform
  3559. rescue StandardError
  3560. js_click(driver, clickable)
  3561. end
  3562. human_pause
  3563. capture_task_html(driver: driver, task_name: "dm_open_direct_new_after_pick", status: "ok", meta: { username: username })
  3564. # Click the continuation CTA to open chat ("Next" on some builds, "Chat" on others).
  3565. continue_btn = nil
  3566. begin
  3567. Selenium::WebDriver::Wait.new(timeout: 12).until do
  3568. continue_btn =
  3569. driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][normalize-space()='Next']").find(&:displayed?) ||
  3570. driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][normalize-space()='Chat']").find(&:displayed?) ||
  3571. driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][contains(translate(normalize-space(.),'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'next')]").find(&:displayed?) ||
  3572. driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][contains(translate(normalize-space(.),'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'chat')]").find(&:displayed?)
  3573. continue_btn.present? && element_enabled?(continue_btn)
  3574. end
  3575. rescue Selenium::WebDriver::Error::TimeoutError
  3576. continue_btn = nil
  3577. end
  3578. # Some UI variants jump directly into the thread immediately after recipient selection.
  3579. return true if driver.current_url.to_s.include?("/direct/t/")
  3580. return false unless continue_btn
  3581. begin
  3582. driver.action.move_to(continue_btn).click.perform
  3583. rescue StandardError
  3584. js_click(driver, continue_btn)
  3585. end
  3586. maybe_capture_filmstrip(driver, label: "dm_open_direct_new_after_next")
  3587. capture_task_html(driver: driver, task_name: "dm_open_direct_new_after_next", status: "ok", meta: { username: username })
  3588. wait_for_dm_composer_or_thread!(driver, timeout: 16)
  3589. true
  3590. rescue Selenium::WebDriver::Error::TimeoutError
  3591. false
  3592. end
  3593. def wait_for_dm_composer_or_thread!(driver, timeout:)
  3594. Selenium::WebDriver::Wait.new(timeout: timeout).until do
  3595. url = driver.current_url.to_s
  3596. # Some failures bounce back to inbox; treat as not-opened.
  3597. next false if url.include?("/direct/inbox")
  3598. url.include?("/direct/t/") || driver.find_elements(css: "div[role='textbox']").any?
  3599. end
  3600. end
  3601. def element_enabled?(el)
  3602. return false unless el
  3603. return false unless (el.displayed? rescue true)
  3604. disabled_attr = (el.attribute("disabled") rescue nil).to_s
  3605. aria_disabled = (el.attribute("aria-disabled") rescue nil).to_s
  3606. disabled_attr.blank? && aria_disabled != "true"
  3607. rescue StandardError
  3608. true
  3609. end
  3610. def human_pause(min_seconds = 0.15, max_seconds = 0.55)
  3611. return if max_seconds.to_f <= 0
  3612. min = min_seconds.to_f
  3613. max = max_seconds.to_f
  3614. d = min + (rand * (max - min))
  3615. sleep(d.clamp(0.0, 2.0))
  3616. end
  3617. def maybe_capture_filmstrip(driver, label:, seconds: 5.0, interval: 0.5)
  3618. return unless ENV["INSTAGRAM_FILMSTRIP"].present?
  3619. root = DEBUG_CAPTURE_DIR.join(Time.current.utc.strftime("%Y%m%d"))
  3620. FileUtils.mkdir_p(root)
  3621. started = Time.current.utc
  3622. deadline = started + seconds.to_f
  3623. frames = []
  3624. i = 0
  3625. while Time.current.utc < deadline
  3626. ts = Time.current.utc.strftime("%Y%m%dT%H%M%S.%LZ")
  3627. safe = label.to_s.downcase.gsub(/[^a-z0-9]+/, "_").gsub(/\A_|_\z/, "")
  3628. path = root.join("#{ts}_filmstrip_#{safe}_#{format('%03d', i)}.png")
  3629. begin
  3630. driver.save_screenshot(path.to_s)
  3631. frames << path.to_s
  3632. rescue StandardError
  3633. # best effort
  3634. end
  3635. i += 1
  3636. sleep(interval.to_f)
  3637. end
  3638. meta = {
  3639. timestamp: Time.current.utc.iso8601(3),
  3640. label: label,
  3641. seconds: seconds,
  3642. interval: interval,
  3643. frames: frames
  3644. }
  3645. File.write(root.join("#{started.strftime('%Y%m%dT%H%M%S.%LZ')}_filmstrip_#{label}.json"), JSON.pretty_generate(meta))
  3646. rescue StandardError
  3647. nil
  3648. end
  3649. def wait_for(driver, css: nil, xpath: nil, timeout: 10)
  3650. wait = Selenium::WebDriver::Wait.new(timeout: timeout)
  3651. wait.until do
  3652. if css
  3653. elements = driver.find_elements(css: css)
  3654. elements.each do |el|
  3655. begin
  3656. return el if el.displayed?
  3657. rescue Selenium::WebDriver::Error::StaleElementReferenceError
  3658. next
  3659. end
  3660. end
  3661. nil
  3662. elsif xpath
  3663. elements = driver.find_elements(xpath: xpath)
  3664. elements.each do |el|
  3665. begin
  3666. return el if el.displayed?
  3667. rescue Selenium::WebDriver::Error::StaleElementReferenceError
  3668. next
  3669. end
  3670. end
  3671. nil
  3672. end
  3673. end
  3674. end
  3675. def wait_for_present(driver, css: nil, xpath: nil, timeout: 10)
  3676. wait = Selenium::WebDriver::Wait.new(timeout: timeout)
  3677. wait.until do
  3678. if css
  3679. driver.find_elements(css: css).any?
  3680. elsif xpath
  3681. driver.find_elements(xpath: xpath).any?
  3682. end
  3683. end
  3684. end
  3685. def dm_textbox_css
  3686. # The DM composer is a contenteditable div (Lexical editor). On some builds there can be multiple
  3687. # role=textbox nodes (hidden + visible), so we prefer the visible contenteditable one.
  3688. "div[role='textbox'][contenteditable='true'], div[role='textbox']"
  3689. end
  3690. def send_text_message_from_driver!(driver, message_text, expected_username: nil)
  3691. raise "Message cannot be blank" if message_text.to_s.strip.blank?
  3692. css = dm_textbox_css
  3693. wait_for_present(driver, css: css, timeout: 12)
  3694. box = find_visible_dm_textbox(driver)
  3695. raise Selenium::WebDriver::Error::NoSuchElementError, "No DM textbox found" unless box
  3696. 3.times do |attempt|
  3697. begin
  3698. driver.execute_script("arguments[0].scrollIntoView({block: 'center', inline: 'nearest'});", box)
  3699. driver.execute_script("arguments[0].focus();", box)
  3700. driver.execute_script("arguments[0].click();", box)
  3701. rescue StandardError
  3702. # best effort
  3703. end
  3704. begin
  3705. box.click
  3706. rescue Selenium::WebDriver::Error::ElementClickInterceptedError, Selenium::WebDriver::Error::ElementNotInteractableError
  3707. # ignore; we'll try typing via actions as a fallback
  3708. end
  3709. begin
  3710. # Clear any residual draft text (best effort).
  3711. begin
  3712. driver.action.click(box).key_down(:control).send_keys("a").key_up(:control).send_keys(:backspace).perform
  3713. rescue StandardError
  3714. nil
  3715. end
  3716. # Type using actions (more reliable on IG's Lexical composer than direct send_keys on the element).
  3717. driver.action.click(box).send_keys(message_text.to_s).perform
  3718. typed = read_dm_textbox_text(driver)
  3719. capture_task_html(
  3720. driver: driver,
  3721. task_name: "dm_send_text_after_type",
  3722. status: "ok",
  3723. meta: { expected_username: expected_username, message_preview: message_text.to_s.strip.byteslice(0, 80), textbox_text_preview: typed.to_s.byteslice(0, 120) }
  3724. )
  3725. # Prefer clicking "Send" first. Recent IG builds sometimes clear the composer on Enter even when
  3726. # the message never actually sends (silent failure), so Enter-first can give a false sense of success.
  3727. clicked_send = click_dm_send_button(driver, textbox: box)
  3728. capture_task_html(
  3729. driver: driver,
  3730. task_name: "dm_send_text_after_send_click",
  3731. status: "ok",
  3732. meta: { expected_username: expected_username, message_preview: message_text.to_s.strip.byteslice(0, 80), clicked_send: clicked_send }
  3733. )
  3734. # If we could not click the Send button, attempt Enter as a fallback.
  3735. enter_attempted = false
  3736. if !(clicked_send.is_a?(Hash) && clicked_send[:clicked])
  3737. begin
  3738. box.send_keys(:enter)
  3739. enter_attempted = true
  3740. rescue StandardError
  3741. enter_attempted = false
  3742. end
  3743. end
  3744. after_enter_text = read_dm_textbox_text(driver)
  3745. capture_task_html(
  3746. driver: driver,
  3747. task_name: "dm_send_text_after_enter",
  3748. status: "ok",
  3749. meta: {
  3750. expected_username: expected_username,
  3751. message_preview: message_text.to_s.strip.byteslice(0, 80),
  3752. enter_attempted: enter_attempted,
  3753. textbox_text_preview: after_enter_text.to_s.byteslice(0, 120),
  3754. send_button_clicked: (clicked_send.is_a?(Hash) ? clicked_send[:clicked] : nil),
  3755. send_button_reason: (clicked_send.is_a?(Hash) ? clicked_send[:reason] : nil)
  3756. }
  3757. )
  3758. sent = (clicked_send.is_a?(Hash) ? clicked_send[:clicked] : !!clicked_send) || enter_attempted
  3759. unless sent
  3760. # Last resort.
  3761. driver.action.send_keys(:enter).perform
  3762. end
  3763. break
  3764. rescue Selenium::WebDriver::Error::StaleElementReferenceError
  3765. sleep(0.4)
  3766. box = find_visible_dm_textbox(driver)
  3767. next
  3768. rescue Selenium::WebDriver::Error::ElementNotInteractableError, Selenium::WebDriver::Error::InvalidElementStateError
  3769. # Fallback: send keys to the active element (Instagram's Lexical editor usually focuses it).
  3770. driver.action.send_keys(message_text.to_s).perform
  3771. tb = find_visible_dm_textbox(driver)
  3772. click_dm_send_button(driver, textbox: tb).to_h[:clicked] || driver.action.send_keys(:enter).perform
  3773. break
  3774. rescue StandardError
  3775. raise if attempt >= 2
  3776. sleep(0.6)
  3777. next
  3778. end
  3779. end
  3780. verify = verify_dm_send(driver, message_text.to_s, expected_username: expected_username)
  3781. return true if verify[:ok]
  3782. if verify[:reason].to_s.start_with?("websocket_tls_error")
  3783. raise websocket_tls_guidance(verify)
  3784. end
  3785. # Force a debug capture even though the caller will also capture on error.
  3786. capture_task_html(driver: driver, task_name: "dm_send_text_verify", status: "error", meta: verify)
  3787. raise "Message not confirmed as sent (#{verify[:reason]})"
  3788. end
  3789. def find_visible_dm_textbox(driver)
  3790. candidates = driver.find_elements(css: "div[role='textbox'][contenteditable='true']")
  3791. candidates = driver.find_elements(css: "div[role='textbox']") if candidates.empty?
  3792. candidates.find do |el|
  3793. begin
  3794. el.displayed?
  3795. rescue Selenium::WebDriver::Error::StaleElementReferenceError
  3796. false
  3797. end
  3798. end || candidates.first
  3799. end
  3800. def read_dm_textbox_text(driver)
  3801. driver.execute_script(<<~JS)
  3802. const textbox =
  3803. document.querySelector("div[role='textbox'][contenteditable='true']") ||
  3804. document.querySelector("div[role='textbox']");
  3805. if (!textbox) return null;
  3806. return (textbox.innerText || "").toString();
  3807. JS
  3808. rescue StandardError
  3809. nil
  3810. end
  3811. def verify_dm_send(driver, message_text, expected_username: nil)
  3812. needle = message_text.to_s.strip
  3813. return { ok: false, reason: "blank message_text" } if needle.blank?
  3814. # Poll briefly because the UI can take a moment to append the outgoing bubble.
  3815. last = nil
  3816. 40.times do |i|
  3817. # Try to keep the message list near the bottom so the newest outgoing bubble is mounted.
  3818. begin
  3819. driver.execute_script(<<~JS)
  3820. const main =
  3821. document.querySelector("div[role='main']") ||
  3822. document.scrollingElement ||
  3823. document.documentElement ||
  3824. document.body;
  3825. try { main.scrollTop = 1e9; } catch (e) {}
  3826. try { window.scrollTo(0, document.body.scrollHeight); } catch (e) {}
  3827. JS
  3828. rescue StandardError
  3829. nil
  3830. end
  3831. last = driver.execute_script(<<~JS, needle, expected_username.to_s)
  3832. const needle = (arguments[0] || "").replace(/\\s+/g, " ").trim();
  3833. const expected = (arguments[1] || "").toLowerCase().trim();
  3834. const norm = (s) => (s || "").replace(/\\s+/g, " ").trim();
  3835. const textbox =
  3836. document.querySelector("div[role='textbox'][contenteditable='true']") ||
  3837. document.querySelector("div[role='textbox']");
  3838. const textboxText = textbox ? norm(textbox.innerText) : null;
  3839. const textboxEmpty = !textboxText || textboxText.length === 0;
  3840. // Common send failure surface text (best effort).
  3841. const bodyText = norm(document.body && document.body.innerText);
  3842. const sendError =
  3843. bodyText.includes("couldn't send") ||
  3844. bodyText.includes("could not send") ||
  3845. bodyText.includes("try again") && bodyText.includes("message");
  3846. const messageRequestInterstitial =
  3847. bodyText.includes("message request") ||
  3848. bodyText.includes("message requests") ||
  3849. (bodyText.includes("allow") && bodyText.includes("decline") && bodyText.includes("message"));
  3850. const alertTexts = Array.from(document.querySelectorAll("[role='alert'],[aria-live='polite'],[aria-live='assertive']"))
  3851. .map((n) => norm(n && (n.innerText || n.textContent)))
  3852. .filter((t) => t && t.length > 0)
  3853. .slice(0, 10);
  3854. // Best-effort: try to validate we are in the intended thread.
  3855. let threadMatches = null;
  3856. if (expected) {
  3857. const hrefs = Array.from(document.querySelectorAll("a[href^='/']"))
  3858. .map((a) => (a.getAttribute("href") || "").toLowerCase());
  3859. threadMatches = hrefs.some((h) => h === `/${expected}/` || h.startsWith(`/${expected}/`));
  3860. }
  3861. const nodes = Array.from(document.querySelectorAll(
  3862. "div[role='row'], div[role='listitem'], [dir='auto'], span[data-lexical-text='true']"
  3863. ));
  3864. let bubbleFound = false;
  3865. for (let i = nodes.length - 1; i >= 0 && i >= nodes.length - 400; i--) {
  3866. const n = nodes[i];
  3867. if (!n) continue;
  3868. if (textbox && (textbox === n || textbox.contains(n) || n.contains(textbox))) continue;
  3869. const t = norm(n.textContent || n.innerText);
  3870. const a = norm(n.getAttribute && n.getAttribute("aria-label"));
  3871. const combined = (t + " " + a).trim();
  3872. if (combined && combined.includes(needle)) { bubbleFound = true; break; }
  3873. }
  3874. return { textboxEmpty, textboxText, bubbleFound, threadMatches, sendError, messageRequestInterstitial, alertTexts };
  3875. JS
  3876. if last.is_a?(Hash) && last["sendError"] == true
  3877. return { ok: false, reason: "send_error_visible", details: last }
  3878. end
  3879. if last.is_a?(Hash) && last["messageRequestInterstitial"] == true
  3880. return { ok: false, reason: "message_request_interstitial_visible", details: last }
  3881. end
  3882. if last.is_a?(Hash) && last["textboxEmpty"] == true && last["bubbleFound"] == true
  3883. # If we can determine threadMatches, require it; otherwise accept.
  3884. if expected_username.to_s.strip.present?
  3885. tm = last["threadMatches"]
  3886. if tm.nil? || tm == true
  3887. return { ok: true, reason: "verified", details: last }
  3888. end
  3889. else
  3890. return { ok: true, reason: "verified", details: last }
  3891. end
  3892. end
  3893. sleep(0.75)
  3894. # Fail fast if DM transport is broken at the browser/network layer.
  3895. if (i % 4).zero?
  3896. tls = detect_websocket_tls_issue(driver)
  3897. if tls[:found]
  3898. return {
  3899. ok: false,
  3900. reason: "websocket_tls_error #{tls[:reason]}",
  3901. tls_issue: tls,
  3902. details: last,
  3903. expected_username: expected_username,
  3904. message_preview: needle.byteslice(0, 80)
  3905. }
  3906. end
  3907. end
  3908. # One refresh mid-way can help when the UI doesn't mount the most recent bubble immediately.
  3909. if i == 10
  3910. begin
  3911. driver.navigate.refresh
  3912. wait_for(driver, css: "body", timeout: 10)
  3913. rescue StandardError
  3914. nil
  3915. end
  3916. end
  3917. end
  3918. tls = detect_websocket_tls_issue(driver)
  3919. if tls[:found]
  3920. return {
  3921. ok: false,
  3922. reason: "websocket_tls_error #{tls[:reason]}",
  3923. tls_issue: tls,
  3924. details: last,
  3925. expected_username: expected_username,
  3926. message_preview: needle.byteslice(0, 80)
  3927. }
  3928. end
  3929. # If we couldn't find the bubble, but the textbox is empty, treat as "unknown" rather than success.
  3930. textbox_empty = last.is_a?(Hash) ? last["textboxEmpty"] : nil
  3931. bubble = last.is_a?(Hash) ? last["bubbleFound"] : nil
  3932. thread = last.is_a?(Hash) ? last["threadMatches"] : nil
  3933. {
  3934. ok: false,
  3935. reason: "textbox_empty=#{textbox_empty.inspect} bubble_found=#{bubble.inspect} thread_matches=#{thread.inspect} message_request_interstitial=#{last.is_a?(Hash) ? last['messageRequestInterstitial'].inspect : 'nil'}",
  3936. details: last,
  3937. expected_username: expected_username,
  3938. message_preview: needle.byteslice(0, 80)
  3939. }
  3940. rescue StandardError => e
  3941. { ok: false, reason: "verify_exception #{e.class}: #{e.message}" }
  3942. end
  3943. def websocket_tls_guidance(verify)
  3944. tls = verify[:tls_issue].to_h
  3945. reason = tls[:reason].presence || "certificate validation error"
  3946. "Instagram DM transport failed: #{reason}. "\
  3947. "Chrome could not establish a trusted secure connection to Instagram chat endpoints. "\
  3948. "Install/trust the system CA used by your network proxy or, for local debugging only, "\
  3949. "set INSTAGRAM_CHROME_IGNORE_CERT_ERRORS=true and retry."
  3950. end
  3951. def detect_websocket_tls_issue(driver)
  3952. return { found: false } unless driver.respond_to?(:logs)
  3953. entries = driver.logs.get(:browser) rescue []
  3954. messages = Array(entries).map { |e| e.message.to_s }
  3955. # Common failure observed in this environment: the IG Direct gateway websocket fails TLS validation,
  3956. # which can prevent DMs from actually being delivered even though the UI clears the composer.
  3957. bad = messages.find { |m| m.include?("gateway.instagram.com/ws/streamcontroller") && m.include?("ERR_CERT_AUTHORITY_INVALID") }
  3958. return { found: true, reason: "ERR_CERT_AUTHORITY_INVALID", message: bad.to_s.byteslice(0, 2000) } if bad
  3959. other = messages.find { |m| m.include?("ERR_CERT_AUTHORITY_INVALID") }
  3960. return { found: true, reason: "ERR_CERT_AUTHORITY_INVALID", message: other.to_s.byteslice(0, 2000) } if other
  3961. { found: false }
  3962. rescue StandardError => e
  3963. { found: false, error: "#{e.class}: #{e.message}" }
  3964. end
  3965. def click_dm_send_button(driver, textbox: nil)
  3966. return { clicked: false, reason: "no_textbox" } unless textbox
  3967. # Mark the send button in-DOM so we can click it via WebDriver actions (more reliable than JS click).
  3968. mark =
  3969. driver.execute_script(<<~JS, textbox)
  3970. const textbox = arguments[0];
  3971. if (!textbox) return { marked: false, reason: "no_textbox" };
  3972. // Clear previous marks (best effort).
  3973. try {
  3974. document.querySelectorAll("[data-codex-send-btn='1']").forEach((n) => n.removeAttribute("data-codex-send-btn"));
  3975. } catch (e) {}
  3976. const isVisible = (el) => {
  3977. if (!el) return false;
  3978. const style = window.getComputedStyle(el);
  3979. if (style.display === "none" || style.visibility === "hidden" || style.opacity === "0") return false;
  3980. const r = el.getBoundingClientRect();
  3981. return (r.width > 0 && r.height > 0);
  3982. };
  3983. const selectors = [
  3984. "[role='button'][aria-label='Send']",
  3985. "[role='button'][aria-label*='Send']",
  3986. "button[aria-label='Send']",
  3987. "button[aria-label*='Send']",
  3988. "svg[aria-label='Send']",
  3989. "svg[aria-label*='Send']"
  3990. ];
  3991. let root = textbox;
  3992. for (let depth = 0; depth < 10 && root; depth++) {
  3993. let candidate = null;
  3994. for (const sel of selectors) {
  3995. const el = root.querySelector ? root.querySelector(sel) : null;
  3996. if (el) { candidate = el; break; }
  3997. }
  3998. if (candidate) {
  3999. let button = candidate;
  4000. if (button && button.tagName && button.tagName.toLowerCase() === "svg") {
  4001. button = button.closest("button,[role='button']") || button;
  4002. }
  4003. const preview = (button && button.outerHTML ? button.outerHTML : "").slice(0, 900);
  4004. const ariaLabel = button && button.getAttribute ? button.getAttribute("aria-label") : null;
  4005. if (!button) return { marked: false, reason: "send_button_null" };
  4006. if (!isVisible(button)) return { marked: false, reason: "send_button_not_visible", ariaLabel, outerHTMLPreview: preview };
  4007. try { button.setAttribute("data-codex-send-btn", "1"); } catch (e) {}
  4008. return { marked: true, ariaLabel, outerHTMLPreview: preview };
  4009. }
  4010. root = root.parentElement;
  4011. }
  4012. return { marked: false, reason: "send_button_not_found_near_textbox" };
  4013. JS
  4014. mark = mark.to_h if mark.respond_to?(:to_h)
  4015. return { clicked: false, reason: "unexpected_js_return: #{mark.class}" } unless mark.is_a?(Hash)
  4016. mark = mark.transform_keys { |k| k.to_s.to_sym }
  4017. return { clicked: false, reason: mark[:reason] || "send_button_not_marked", aria_label: mark[:ariaLabel], outer_html_preview: mark[:outerHTMLPreview] } unless mark[:marked]
  4018. el = driver.find_element(css: "[data-codex-send-btn='1']")
  4019. begin
  4020. driver.action.move_to(el).click.perform
  4021. rescue StandardError
  4022. js_click(driver, el)
  4023. end
  4024. # Clean up the mark to avoid confusing later steps.
  4025. begin
  4026. driver.execute_script("arguments[0].removeAttribute('data-codex-send-btn');", el)
  4027. rescue StandardError
  4028. nil
  4029. end
  4030. { clicked: true, reason: "clicked", aria_label: mark[:ariaLabel], outer_html_preview: mark[:outerHTMLPreview] }
  4031. rescue StandardError => e
  4032. { clicked: false, reason: "send_click_exception #{e.class}: #{e.message}" }
  4033. end
  4034. def normalize_username(value)
  4035. value.to_s.strip.downcase.gsub(/[^a-z0-9._]/, "")
  4036. end
  4037. def normalize_count(value)
  4038. text = value.to_s.strip
  4039. return nil unless text.match?(/\A\d+\z/)
  4040. text.to_i
  4041. rescue StandardError
  4042. nil
  4043. end
  4044. def extract_profile_follow_counts(html)
  4045. # Best-effort; depends on English locale. Example:
  4046. # "246 Followers, 661 Following, 37 Posts - See Instagram photos..."
  4047. m = html.to_s.match(/content=\"\s*([\d,]+)\s*Followers,\s*([\d,]+)\s*Following\b/i)
  4048. return nil unless m
  4049. {
  4050. followers: m[1].to_s.delete(",").to_i,
  4051. following: m[2].to_s.delete(",").to_i
  4052. }
  4053. rescue StandardError
  4054. nil
  4055. end
  4056. def extract_conversation_users_from_inbox_html(html)
  4057. users = {}
  4058. verify_segments = 0
  4059. return [users, verify_segments] if html.blank?
  4060. # Extract from the Lightspeed payload embedded in the inbox page. Example structure:
  4061. # ... "verifyContactRowExists", ... , "Display Name", ... , "username", [9], [9]]]
  4062. #
  4063. # We avoid DOM selectors here because the inbox is frequently rendered as role="button" rows
  4064. # and the username often only appears inside embedded payloads.
  4065. # In many builds the payload is itself a JSON-encoded string, so quotes appear as \"...\".
  4066. segments = html.scan(/\\\"verifyContactRowExists\\\"[\s\S]{0,4000}?\[9\],\s*\[9\]\]\]/)
  4067. segments = html.scan(/"verifyContactRowExists"[\s\S]{0,4000}?\[9\],\s*\[9\]\]\]/) if segments.empty?
  4068. verify_segments += segments.length
  4069. segments.each do |segment|
  4070. # Candidate usernames appear lowercase in this payload (usernames are case-insensitive but stored normalized).
  4071. token_re =
  4072. if segment.include?("\\\"")
  4073. /\\\"([A-Za-z0-9._]{1,30})\\\"/
  4074. else
  4075. /"([A-Za-z0-9._]{1,30})"/
  4076. end
  4077. tokens = segment.scan(token_re).flatten
  4078. candidate_usernames = tokens.select { |t| t == t.downcase && t.match?(/\A[a-z0-9._]{1,30}\z/) }
  4079. username = candidate_usernames.last.to_s
  4080. next if username.blank?
  4081. display_re =
  4082. if segment.include?("\\\"")
  4083. /\\\"([^\\\"]{1,80})\\\"/
  4084. else
  4085. /"([^"]{1,80})"/
  4086. end
  4087. display_candidates = segment.scan(display_re).flatten
  4088. display = display_candidates.reverse.find do |t|
  4089. next false if t.blank?
  4090. next false if t.include?("/") || t.match?(%r{\Ahttps?://}i)
  4091. next false if t.match?(/\A[a-z0-9._]{1,30}\z/) # likely a username token
  4092. next false if t.match?(/\Amessaging\b/i) || t.match?(/\blightspeed\b/i) || t.match?(/\bmedia_fallback\b/i)
  4093. true
  4094. end
  4095. users[normalize_username(username)] ||= { display_name: display.presence || username }
  4096. end
  4097. [users, verify_segments]
  4098. rescue StandardError
  4099. [users, verify_segments]
  4100. end
  4101. def extract_story_users_from_home_html(html)
  4102. return [] if html.blank?
  4103. # Try multiple preloader patterns with more aggressive matching
  4104. patterns = [
  4105. "adp_PolarisStoriesV3TrayContainerQueryRelayPreloader_",
  4106. "adp_PolarisStoriesV",
  4107. "StoriesTrayContainer",
  4108. "stories_tray",
  4109. "story-tray",
  4110. "StoryTray",
  4111. "storyTray",
  4112. "stories-container",
  4113. "storiesContainer"
  4114. ]
  4115. idx = nil
  4116. window = ""
  4117. patterns.each do |pattern|
  4118. idx = html.index(pattern)
  4119. if idx
  4120. window = html.byteslice(idx, 800_000) || ""
  4121. break
  4122. end
  4123. end
  4124. # If no preloader found, try direct username extraction from the entire HTML
  4125. if idx.nil?
  4126. # Look for any story-related patterns in the HTML
  4127. story_patterns = [
  4128. /\"username\":\"([A-Za-z0-9._]{1,30})\"[\s\S]{0,1000}\"has_story\":true/,
  4129. /\"user\":\{[\s\S]{0,2000}\"username\":\"([A-Za-z0-9._]{1,30})\"[\s\S]{0,2000}\"has_?story\":\s*true/,
  4130. /\"([A-Za-z0-9._]{1,30})\"[\s\S]{0,500}\"story\"/,
  4131. /\/stories\/([A-Za-z0-9._]{1,30})\//
  4132. ]
  4133. usernames = []
  4134. story_patterns.each do |pattern|
  4135. matches = html.scan(pattern)
  4136. if matches.is_a?(Array)
  4137. matches = matches.flatten if matches.first.is_a?(Array)
  4138. usernames.concat(matches)
  4139. end
  4140. end
  4141. return usernames.map { |u| normalize_username(u) }.reject(&:blank?).uniq.take(12)
  4142. end
  4143. # Prefer story-tray item extraction
  4144. tray_usernames = window.scan(/\"user\":\{[\s\S]{0,4000}?\"username\":\"([A-Za-z0-9._]{1,30})\"[\s\S]{0,4000}?\"uuid\":\"/).flatten
  4145. tray_usernames = tray_usernames.map { |u| normalize_username(u) }.reject(&:blank?).uniq
  4146. return tray_usernames unless tray_usernames.empty?
  4147. # Fallback: grab usernames in this payload window
  4148. usernames = window.scan(/\"username\":\"([A-Za-z0-9._]{1,30})\"/).flatten.map { |u| normalize_username(u) }
  4149. usernames.reject(&:blank?).uniq
  4150. rescue StandardError => e
  4151. Rails.logger.error "Story extraction error: #{e.message}" if defined?(Rails)
  4152. []
  4153. end
  4154. def source_for(username, conversation_users, story_users)
  4155. in_conversation = conversation_users.key?(username)
  4156. in_story = story_users.key?(username)
  4157. return "conversation+story" if in_conversation && in_story
  4158. return "story" if in_story
  4159. "conversation"
  4160. end
  4161. def auto_engage_first_story!(driver:, story_hold_seconds:)
  4162. result = { attempted: false, replied: false, replied_count: 0, username: nil, story_ref: nil, processed_stories: 0 }
  4163. username = fetch_story_users_via_api.keys.first.to_s
  4164. if username.blank?
  4165. result[:reply_skipped] = true
  4166. result[:reply_skip_reason] = "api_story_users_unavailable"
  4167. return result
  4168. end
  4169. return result if username.blank?
  4170. result[:attempted] = true
  4171. result[:username] = username
  4172. profile = find_story_network_profile(username: username)
  4173. unless profile
  4174. capture_task_html(
  4175. driver: driver,
  4176. task_name: "auto_engage_story_out_of_network_skipped",
  4177. status: "ok",
  4178. meta: { username: username, reason: "profile_not_in_network" }
  4179. )
  4180. result[:reply_skipped] = true
  4181. result[:reply_skip_reason] = "profile_not_in_network"
  4182. return result
  4183. end
  4184. story_items = fetch_story_items_via_api(username: username)
  4185. if story_items.blank?
  4186. result[:reply_skipped] = true
  4187. result[:reply_skip_reason] = "no_story_items"
  4188. return result
  4189. end
  4190. story_items.each do |story|
  4191. story_id = story[:story_id].to_s
  4192. next if story_id.blank?
  4193. result[:processed_stories] += 1
  4194. story_ref = "#{username}:#{story_id}"
  4195. result[:story_ref] ||= story_ref
  4196. if ActiveModel::Type::Boolean.new.cast(story[:api_should_skip])
  4197. result[:reply_skipped] = true
  4198. result[:reply_skip_reason] = story[:api_external_profile_reason].to_s.presence || "api_external_profile_indicator"
  4199. next
  4200. end
  4201. can_reply = story[:can_reply]
  4202. if can_reply == false
  4203. result[:reply_skipped] = true
  4204. result[:reply_skip_reason] = "api_can_reply_false"
  4205. next
  4206. end
  4207. media_url = story[:media_url].to_s
  4208. next if media_url.blank?
  4209. download = download_media_with_metadata(url: media_url, user_agent: @account.user_agent)
  4210. downloaded_at = Time.current
  4211. downloaded_event = profile.record_event!(
  4212. kind: "story_media_downloaded_via_feed",
  4213. external_id: "story_media_downloaded_via_feed:#{story_ref}:#{downloaded_at.utc.iso8601(6)}",
  4214. occurred_at: downloaded_at,
  4215. metadata: {
  4216. source: "selenium_story_viewer",
  4217. media_source: "api_story_item",
  4218. media_type: story[:media_type],
  4219. username: username,
  4220. story_id: story_id,
  4221. story_ref: story_ref,
  4222. download_link: media_url,
  4223. media_size_bytes: download[:bytes].bytesize,
  4224. content_type: download[:content_type],
  4225. final_url: download[:final_url]
  4226. }
  4227. )
  4228. downloaded_event.media.attach(
  4229. io: StringIO.new(download[:bytes]),
  4230. filename: download[:filename],
  4231. content_type: download[:content_type]
  4232. )
  4233. InstagramProfileEvent.broadcast_story_archive_refresh!(account: @account)
  4234. payload = build_auto_engagement_post_payload(
  4235. profile: profile,
  4236. shortcode: story_ref,
  4237. caption: story[:caption],
  4238. permalink: story[:permalink].to_s.presence || "#{INSTAGRAM_BASE_URL}/stories/#{username}/#{story_id}/",
  4239. include_story_history: true
  4240. )
  4241. analysis = analyze_for_auto_engagement!(
  4242. analyzable: downloaded_event,
  4243. payload: payload,
  4244. bytes: download[:bytes],
  4245. content_type: download[:content_type],
  4246. source_url: media_url
  4247. )
  4248. suggestions = generate_comment_suggestions_from_analysis!(
  4249. profile: profile,
  4250. payload: payload,
  4251. analysis: analysis
  4252. )
  4253. comment_text = suggestions.first.to_s.strip
  4254. next if comment_text.blank?
  4255. comment_result = comment_on_story_via_api!(story_id: story_id, story_username: username, comment_text: comment_text)
  4256. if !comment_result[:posted]
  4257. driver.navigate.to("#{INSTAGRAM_BASE_URL}/stories/#{username}/#{story_id}/")
  4258. wait_for(driver, css: "body", timeout: 12)
  4259. dismiss_common_overlays!(driver)
  4260. freeze_story_progress!(driver)
  4261. comment_result = comment_on_story_via_ui!(driver: driver, comment_text: comment_text)
  4262. end
  4263. posted = comment_result[:posted]
  4264. sleep(story_hold_seconds.to_i) if posted
  4265. if posted
  4266. result[:replied] = true
  4267. result[:replied_count] = result[:replied_count].to_i + 1
  4268. profile.record_event!(
  4269. kind: "story_comment_posted_via_feed",
  4270. external_id: "story_comment_posted_via_feed:#{story_ref}:#{Time.current.utc.iso8601(6)}",
  4271. occurred_at: Time.current,
  4272. metadata: {
  4273. source: "selenium_story_viewer",
  4274. username: username,
  4275. story_id: story_id,
  4276. story_ref: story_ref,
  4277. comment_text: comment_text,
  4278. submission_method: comment_result[:method],
  4279. analysis: analysis
  4280. }
  4281. )
  4282. attach_reply_comment_to_downloaded_event!(downloaded_event: downloaded_event, comment_text: comment_text)
  4283. end
  4284. rescue StandardError
  4285. next
  4286. end
  4287. result
  4288. rescue StandardError => e
  4289. capture_task_html(
  4290. driver: driver,
  4291. task_name: "auto_engage_story_failed",
  4292. status: "error",
  4293. meta: { error_class: e.class.name, error_message: e.message }
  4294. )
  4295. result
  4296. end
  4297. def auto_engage_feed_post!(driver:, item:)
  4298. shortcode = item[:shortcode].to_s
  4299. username = normalize_username(item[:author_username].to_s)
  4300. profile = find_or_create_profile_for_auto_engagement!(username: username)
  4301. capture_task_html(
  4302. driver: driver,
  4303. task_name: "auto_engage_post_selected",
  4304. status: "ok",
  4305. meta: { shortcode: shortcode, username: username, media_url: item[:media_url] }
  4306. )
  4307. download = download_media_with_metadata(url: item[:media_url], user_agent: @account.user_agent)
  4308. downloaded_at = Time.current
  4309. downloaded_event = profile.record_event!(
  4310. kind: "feed_post_image_downloaded",
  4311. external_id: "feed_post_image_downloaded:#{shortcode}:#{downloaded_at.utc.iso8601(6)}",
  4312. occurred_at: downloaded_at,
  4313. metadata: {
  4314. source: "selenium_home_feed",
  4315. shortcode: shortcode,
  4316. download_link: item[:media_url],
  4317. original_image_size_bytes: download[:bytes].bytesize,
  4318. original_image_width: item.dig(:metadata, :natural_width),
  4319. original_image_height: item.dig(:metadata, :natural_height),
  4320. content_type: download[:content_type],
  4321. final_url: download[:final_url]
  4322. }
  4323. )
  4324. downloaded_event.media.attach(
  4325. io: StringIO.new(download[:bytes]),
  4326. filename: download[:filename],
  4327. content_type: download[:content_type]
  4328. )
  4329. payload = build_auto_engagement_post_payload(
  4330. profile: profile,
  4331. shortcode: shortcode,
  4332. caption: item[:caption],
  4333. permalink: "#{INSTAGRAM_BASE_URL}/p/#{shortcode}/",
  4334. include_story_history: false
  4335. )
  4336. analysis = analyze_for_auto_engagement!(
  4337. analyzable: downloaded_event,
  4338. payload: payload,
  4339. bytes: download[:bytes],
  4340. content_type: download[:content_type],
  4341. source_url: item[:media_url]
  4342. )
  4343. suggestions = generate_comment_suggestions_from_analysis!(
  4344. profile: profile,
  4345. payload: payload,
  4346. analysis: analysis
  4347. )
  4348. comment_text = suggestions.first.to_s.strip
  4349. posted = comment_text.present? && comment_on_post_via_ui!(driver: driver, shortcode: shortcode, comment_text: comment_text)
  4350. profile.record_event!(
  4351. kind: "feed_post_comment_posted",
  4352. external_id: "feed_post_comment_posted:#{shortcode}:#{Time.current.utc.iso8601(6)}",
  4353. occurred_at: Time.current,
  4354. metadata: {
  4355. source: "selenium_home_feed",
  4356. shortcode: shortcode,
  4357. username: username,
  4358. posted: posted,
  4359. posted_comment: comment_text,
  4360. generated_suggestions: suggestions.first(8),
  4361. analysis: analysis
  4362. }
  4363. )
  4364. {
  4365. shortcode: shortcode,
  4366. username: username,
  4367. comment_posted: posted,
  4368. posted_comment: comment_text
  4369. }
  4370. end
  4371. def find_or_create_profile_for_auto_engagement!(username:)
  4372. normalized = normalize_username(username)
  4373. raise "Feed item username is missing" if normalized.blank?
  4374. @account.instagram_profiles.find_or_create_by!(username: normalized) do |profile|
  4375. profile.display_name = normalized
  4376. profile.can_message = nil
  4377. end
  4378. end
  4379. def find_story_network_profile(username:)
  4380. normalized = normalize_username(username)
  4381. return nil if normalized.blank?
  4382. @account.instagram_profiles
  4383. .where(username: normalized)
  4384. .where("following = ? OR follows_you = ?", true, true)
  4385. .first
  4386. rescue StandardError
  4387. nil
  4388. end
  4389. def find_profile_for_interaction(username:)
  4390. normalized = normalize_username(username)
  4391. return nil if normalized.blank?
  4392. @account.instagram_profiles.where(username: normalized).first
  4393. rescue StandardError
  4394. nil
  4395. end
  4396. def profile_auto_reply_enabled?(profile)
  4397. return profile.auto_reply_enabled? if profile.respond_to?(:auto_reply_enabled?)
  4398. profile.profile_tags.where(name: [ "automatic_reply", "automatic reply", "auto_reply", "auto reply" ]).exists?
  4399. end
  4400. def story_viewer_ready?(dom)
  4401. dom.is_a?(Hash) && dom[:story_viewer_active]
  4402. end
  4403. def find_home_story_open_target(driver, excluded_usernames: [])
  4404. # First, try to capture the current page state for debugging
  4405. page_debug = driver.execute_script(<<~JS)
  4406. return {
  4407. url: window.location.href,
  4408. title: document.title,
  4409. storyLinks: document.querySelectorAll("a[href*='/stories/']").length,
  4410. storyButtons: document.querySelectorAll("[aria-label*='story' i]").length,
  4411. allButtons: document.querySelectorAll("button, [role='button']").length,
  4412. allLinks: document.querySelectorAll("a").length,
  4413. bodyText: document.body.innerText.slice(0, 500),
  4414. hasStoryTray: !!document.querySelector('[data-testid*="story"], [class*="story"], [id*="story"]')
  4415. };
  4416. JS
  4417. payload = driver.execute_script(<<~JS, excluded_usernames, page_debug)
  4418. const excluded = Array.isArray(arguments[0]) ? arguments[0].map((u) => (u || "").toString().toLowerCase()).filter(Boolean) : [];
  4419. const isVisible = (el) => {
  4420. if (!el) return false;
  4421. const s = window.getComputedStyle(el);
  4422. if (!s || s.display === "none" || s.visibility === "hidden" || s.opacity === "0" || s.pointerEvents === "none") return false;
  4423. const r = el.getBoundingClientRect();
  4424. return r.width > 5 && r.height > 5 && r.bottom > 0 && r.right > 0;
  4425. };
  4426. const isExcluded = (text, href) => excluded.some((u) => text.includes(u) || href.includes(`/${u}/`));
  4427. const candidates = [];
  4428. const add = (el, strategy) => {
  4429. if (!el) return;
  4430. try {
  4431. if (!isVisible(el)) return;
  4432. const r = el.getBoundingClientRect();
  4433. const topZone = r.top >= 0 && r.top < Math.max(760, window.innerHeight * 0.85);
  4434. if (!topZone) return;
  4435. const text = (el.getAttribute("aria-label") || el.textContent || "").toLowerCase();
  4436. const href = (el.getAttribute("href") || "").toLowerCase();
  4437. const liveHost = el.closest("a[href*='/live/'], [href*='/live/']");
  4438. if (text.includes("your story")) return;
  4439. if (text.includes("live") || href.includes("/live/") || liveHost) return;
  4440. if (isExcluded(text, href)) return;
  4441. candidates.push({ el, strategy, top: r.top, left: r.left, w: r.width, h: r.height, text: text.slice(0, 50), href: href.slice(0, 50) });
  4442. } catch (e) {
  4443. // Skip problematic elements
  4444. }
  4445. };
  4446. // Aggressive story detection with multiple fallback strategies
  4447. document.querySelectorAll("a[href*='/stories/']").forEach((el) => add(el, "href_story_link"));
  4448. document.querySelectorAll("button[aria-label*='story' i], [role='button'][aria-label*='story' i], a[aria-label*='story' i]").forEach((el) => add(el, "aria_story_button"));
  4449. document.querySelectorAll("[data-testid*='story'], [class*='story'], [id*='story']").forEach((container) => {
  4450. try {
  4451. container.querySelectorAll("a, button, [role='button'], [class*='avatar'], [class*='profile']").forEach((el) => add(el, "container_story_element"));
  4452. } catch (e) {}
  4453. });
  4454. // Ultra-fallback: any clickable element that might be a story
  4455. if (candidates.length === 0) {
  4456. document.querySelectorAll("a[href*='/'], button, [role='button']").forEach((el) => {
  4457. try {
  4458. const text = (el.getAttribute("aria-label") || el.textContent || "").toLowerCase();
  4459. const href = (el.getAttribute("href") || "").toLowerCase();
  4460. if (text.includes("story") || href.includes("story") || (text && text.length > 0 && text.length < 50)) {
  4461. add(el, "ultra_fallback");
  4462. }
  4463. } catch (e) {}
  4464. });
  4465. }
  4466. candidates.sort((a, b) => (a.top - b.top) || (a.left - b.left));
  4467. const chosen = candidates[0];
  4468. if (!chosen) return { found: false, count: 0, strategy: "none", debug: { candidates: candidates.length, totalStoryLinks: document.querySelectorAll("a[href*='/stories/']").length, totalStoryButtons: document.querySelectorAll("[aria-label*='story' i]").length, pageDebug: arguments[1] } };
  4469. try { chosen.el.setAttribute("data-codex-story-open", "1"); } catch (e) {}
  4470. return { found: true, count: candidates.length, strategy: chosen.strategy, debug: { candidates: candidates.length, chosenStrategy: chosen.strategy, chosenText: chosen.text, chosenHref: chosen.href, pageDebug: arguments[1] } };
  4471. JS
  4472. el = nil
  4473. if payload.is_a?(Hash) && payload["found"]
  4474. begin
  4475. el = driver.find_element(css: "[data-codex-story-open='1']")
  4476. rescue StandardError
  4477. el = nil
  4478. end
  4479. end
  4480. {
  4481. element: el,
  4482. count: payload.is_a?(Hash) ? payload["count"].to_i : 0,
  4483. strategy: payload.is_a?(Hash) ? payload["strategy"].to_s : "none",
  4484. debug: payload.is_a?(Hash) ? payload["debug"] : {}
  4485. }
  4486. ensure
  4487. begin
  4488. driver.execute_script("const el=document.querySelector('[data-codex-story-open=\"1\"]'); if (el) el.removeAttribute('data-codex-story-open');")
  4489. rescue StandardError
  4490. nil
  4491. end
  4492. end
  4493. def detect_home_story_carousel_probe(driver, excluded_usernames: [])
  4494. # Force capture page state on every probe for debugging
  4495. page_debug = driver.execute_script(<<~JS)
  4496. return {
  4497. url: window.location.href,
  4498. title: document.title,
  4499. storyLinks: document.querySelectorAll("a[href*='/stories/']").length,
  4500. storyButtons: document.querySelectorAll("[aria-label*='story' i]").length,
  4501. allButtons: document.querySelectorAll("button, [role='button']").length,
  4502. allLinks: document.querySelectorAll("a").length,
  4503. bodyText: document.body.innerText.slice(0, 1000),
  4504. hasStoryTray: !!document.querySelector('[data-testid*="story"], [class*="story"], [id*="story"]'),
  4505. htmlLength: document.documentElement.outerHTML.length,
  4506. readyState: document.readyState,
  4507. visibleElements: Array.from(document.querySelectorAll('*')).filter(el => {
  4508. try {
  4509. const rect = el.getBoundingClientRect();
  4510. return rect.width > 0 && rect.height > 0 && rect.top >= 0 && rect.top < window.innerHeight;
  4511. } catch(e) { return false; }
  4512. }).length
  4513. };
  4514. JS
  4515. # Always capture debug info
  4516. Rails.logger.info "Story carousel probe debug: #{page_debug.inspect}" if defined?(Rails)
  4517. anchors = driver.find_elements(css: "a[href*='/stories/']")
  4518. visible_anchor = anchors.find { |el| el.displayed? rescue false } || anchors.first
  4519. target = find_home_story_open_target(driver, excluded_usernames: excluded_usernames)
  4520. html = driver.page_source.to_s
  4521. Rails.logger.info "HTML length: #{html.length}, contains stories pattern: #{html.include?('stories')}" if defined?(Rails)
  4522. prefetch_users = extract_story_users_from_home_html(html)
  4523. result = {
  4524. anchor: visible_anchor,
  4525. target: target[:element],
  4526. target_count: target[:count].to_i,
  4527. target_strategy: target[:strategy].to_s.presence || "none",
  4528. anchor_count: anchors.length,
  4529. prefetch_count: prefetch_users.length,
  4530. prefetch_usernames: prefetch_users.take(12),
  4531. debug: target[:debug] || {},
  4532. page_debug: page_debug
  4533. }
  4534. Rails.logger.info "Carousel probe result: #{result.inspect}" if defined?(Rails)
  4535. result
  4536. rescue StandardError => e
  4537. Rails.logger.error "Carousel probe error: #{e.message}" if defined?(Rails)
  4538. { anchor: nil, target: nil, target_count: 0, target_strategy: "none", anchor_count: 0, prefetch_count: 0, prefetch_usernames: [], debug: { error: e.message } }
  4539. end
  4540. def click_home_story_open_target_via_js(driver, excluded_usernames: [])
  4541. payload = driver.execute_script(<<~JS, excluded_usernames)
  4542. const excluded = Array.isArray(arguments[0]) ? arguments[0].map((u) => (u || "").toString().toLowerCase()).filter(Boolean) : [];
  4543. const isVisible = (el) => {
  4544. if (!el) return false;
  4545. const s = window.getComputedStyle(el);
  4546. if (!s || s.display === "none" || s.visibility === "hidden" || s.pointerEvents === "none") return false;
  4547. const r = el.getBoundingClientRect();
  4548. return r.width > 18 && r.height > 18 && r.bottom > 0 && r.right > 0;
  4549. };
  4550. const isExcluded = (text, href) => excluded.some((u) => text.includes(u) || href.includes(`/${u}/`));
  4551. const clickEl = (el) => {
  4552. try { el.scrollIntoView({ block: "center", inline: "center" }); } catch (e) {}
  4553. const evt = { view: window, bubbles: true, cancelable: true, composed: true, button: 0 };
  4554. ["pointerdown", "mousedown", "mouseup", "click"].forEach((type) => {
  4555. try { el.dispatchEvent(new MouseEvent(type, evt)); } catch (e) {}
  4556. });
  4557. try { el.click(); } catch (e) {}
  4558. return true;
  4559. };
  4560. const candidates = [];
  4561. const add = (el, strategy) => {
  4562. if (!isVisible(el)) return;
  4563. const r = el.getBoundingClientRect();
  4564. const topZone = r.top >= 0 && r.top < Math.max(760, window.innerHeight * 0.85);
  4565. if (!topZone) return;
  4566. const text = (el.getAttribute("aria-label") || el.textContent || "").toLowerCase();
  4567. const href = (el.getAttribute("href") || "").toLowerCase();
  4568. const liveHost = el.closest("a[href*='/live/'], [href*='/live/']");
  4569. if (text.includes("your story")) return;
  4570. if (text.includes("live") || href.includes("/live/") || liveHost) return;
  4571. if (isExcluded(text, href)) return;
  4572. candidates.push({ el, strategy, top: r.top, left: r.left });
  4573. };
  4574. document.querySelectorAll("a[href*='/stories/']").forEach((el) => add(el, "href_story_link"));
  4575. document.querySelectorAll("button[aria-label*='story' i], [role='button'][aria-label*='story' i], a[aria-label*='story' i]").forEach((el) => add(el, "aria_story_button"));
  4576. candidates.sort((a, b) => (a.top - b.top) || (a.left - b.left));
  4577. const chosen = candidates[0];
  4578. if (!chosen) return { clicked: false, count: 0, strategy: "none" };
  4579. clickEl(chosen.el);
  4580. return { clicked: true, count: candidates.length, strategy: chosen.strategy };
  4581. JS
  4582. {
  4583. clicked: payload.is_a?(Hash) && payload["clicked"] == true,
  4584. count: payload.is_a?(Hash) ? payload["count"].to_i : 0,
  4585. strategy: payload.is_a?(Hash) ? payload["strategy"].to_s : "none"
  4586. }
  4587. rescue StandardError
  4588. { clicked: false, count: 0, strategy: "none" }
  4589. end
  4590. def open_story_from_prefetch_usernames(driver:, usernames:, attempts:, probe:)
  4591. candidates = Array(usernames).map { |u| normalize_username(u) }.reject(&:blank?).uniq.take(8)
  4592. return false if candidates.empty?
  4593. candidates.each_with_index do |normalized, idx|
  4594. begin
  4595. driver.navigate.to("#{INSTAGRAM_BASE_URL}/stories/#{normalized}/")
  4596. wait_for(driver, css: "body", timeout: 12)
  4597. 4.times do
  4598. sleep(0.6)
  4599. dom = extract_story_dom_context(driver)
  4600. if story_viewer_ready?(dom)
  4601. capture_task_html(
  4602. driver: driver,
  4603. task_name: "home_story_sync_first_story_opened_prefetch_route",
  4604. status: "ok",
  4605. meta: {
  4606. strategy: "prefetch_username_route",
  4607. username: normalized,
  4608. candidate_index: idx,
  4609. candidate_count: candidates.length,
  4610. attempts: attempts,
  4611. target_count: probe[:target_count],
  4612. anchor_count: probe[:anchor_count],
  4613. prefetch_story_usernames: probe[:prefetch_count]
  4614. }
  4615. )
  4616. return true
  4617. end
  4618. end
  4619. rescue StandardError
  4620. nil
  4621. end
  4622. end
  4623. capture_task_html(
  4624. driver: driver,
  4625. task_name: "home_story_sync_first_story_opened_prefetch_route",
  4626. status: "error",
  4627. meta: {
  4628. strategy: "prefetch_username_route",
  4629. attempts: attempts,
  4630. target_count: probe[:target_count],
  4631. anchor_count: probe[:anchor_count],
  4632. prefetch_story_usernames: probe[:prefetch_count],
  4633. usernames_tried: candidates
  4634. }
  4635. )
  4636. false
  4637. end
  4638. def open_first_story_from_home_carousel!(driver:)
  4639. started_at = Time.current
  4640. deadline = started_at + 45.seconds # Further increased timeout
  4641. attempts = 0
  4642. last_probe = {}
  4643. prefetch_route_attempted = false
  4644. excluded_usernames = []
  4645. while Time.current < deadline
  4646. attempts += 1
  4647. dismiss_common_overlays!(driver)
  4648. # Force scroll to ensure stories are loaded
  4649. if attempts == 1
  4650. begin
  4651. driver.execute_script("window.scrollTo(0, 0);")
  4652. sleep(1.0)
  4653. rescue StandardError
  4654. nil
  4655. end
  4656. end
  4657. # Check if we're on the right page
  4658. current_url = driver.current_url.to_s
  4659. if !current_url.include?("instagram.com") && !current_url.include?(INSTAGRAM_BASE_URL)
  4660. Rails.logger.warn "Not on Instagram page, redirecting. Current URL: #{current_url}" if defined?(Rails)
  4661. begin
  4662. driver.navigate.to(INSTAGRAM_BASE_URL)
  4663. wait_for(driver, css: "body", timeout: 12)
  4664. dismiss_common_overlays!(driver)
  4665. sleep(2.0)
  4666. next
  4667. rescue StandardError => e
  4668. Rails.logger.error "Failed to redirect to Instagram: #{e.message}" if defined?(Rails)
  4669. next
  4670. end
  4671. end
  4672. probe = detect_home_story_carousel_probe(driver, excluded_usernames: excluded_usernames)
  4673. last_probe = probe
  4674. # Enhanced debugging for failed story detection
  4675. if attempts == 1 || (attempts % 3 == 0) || (probe[:target_count].to_i.zero? && probe[:anchor_count].to_i.zero? && probe[:prefetch_count].to_i.zero?)
  4676. capture_task_html(
  4677. driver: driver,
  4678. task_name: "home_story_sync_debug_probe",
  4679. status: "ok",
  4680. meta: {
  4681. attempts: attempts,
  4682. target_count: probe[:target_count],
  4683. anchor_count: probe[:anchor_count],
  4684. prefetch_count: probe[:prefetch_count],
  4685. target_strategy: probe[:target_strategy],
  4686. debug_info: probe[:debug],
  4687. page_debug: probe[:page_debug],
  4688. current_url: current_url,
  4689. all_zero: probe[:target_count].to_i.zero? && probe[:anchor_count].to_i.zero? && probe[:prefetch_count].to_i.zero?
  4690. }
  4691. )
  4692. end
  4693. # Aggressive prefetch route attempt when no elements found
  4694. if !prefetch_route_attempted && attempts >= 2 && (probe[:anchor_count].to_i.zero? || probe[:target_count].to_i.zero?) && Array(probe[:prefetch_usernames]).present?
  4695. prefetch_route_attempted = true
  4696. opened = open_story_from_prefetch_usernames(
  4697. driver: driver,
  4698. usernames: Array(probe[:prefetch_usernames]),
  4699. attempts: attempts,
  4700. probe: probe
  4701. )
  4702. return true if opened
  4703. end
  4704. # Try direct navigation if no stories found after multiple attempts
  4705. if attempts >= 6 && probe[:target_count].to_i.zero? && probe[:anchor_count].to_i.zero? && probe[:prefetch_count].to_i.zero?
  4706. # Try to navigate to stories directly as last resort
  4707. begin
  4708. Rails.logger.info "No stories found, attempting refresh and retry" if defined?(Rails)
  4709. driver.navigate.to("#{INSTAGRAM_BASE_URL}/")
  4710. wait_for(driver, css: "body", timeout: 12)
  4711. dismiss_common_overlays!(driver)
  4712. sleep(2.0)
  4713. next
  4714. rescue StandardError
  4715. nil
  4716. end
  4717. end
  4718. target = probe[:target]
  4719. if target
  4720. clicked_target = false
  4721. begin
  4722. driver.action.move_to(target).click.perform
  4723. clicked_target = true
  4724. rescue StandardError
  4725. begin
  4726. js_click(driver, target)
  4727. clicked_target = true
  4728. rescue StandardError
  4729. clicked_target = false
  4730. end
  4731. end
  4732. if clicked_target
  4733. sleep(0.8)
  4734. dom = extract_story_dom_context(driver)
  4735. unless story_viewer_ready?(dom)
  4736. current_url = driver.current_url.to_s
  4737. if current_url.include?("/live/")
  4738. live_username = extract_username_from_profile_like_path(current_url)
  4739. excluded_usernames << live_username if live_username.present? && !excluded_usernames.include?(live_username)
  4740. end
  4741. capture_task_html(
  4742. driver: driver,
  4743. task_name: "home_story_sync_first_story_opened",
  4744. status: "error",
  4745. meta: {
  4746. strategy: probe[:target_strategy],
  4747. attempts: attempts,
  4748. target_count: probe[:target_count],
  4749. anchor_count: probe[:anchor_count],
  4750. prefetch_story_usernames: probe[:prefetch_count],
  4751. reason: "clicked_target_but_story_frame_not_detected",
  4752. current_url: current_url,
  4753. excluded_usernames: excluded_usernames,
  4754. story_viewer_active: dom[:story_viewer_active],
  4755. story_frame_present: dom[:story_frame_present],
  4756. media_signature: dom[:media_signature].to_s.byteslice(0, 120),
  4757. debug_info: probe[:debug],
  4758. page_debug: probe[:page_debug]
  4759. }
  4760. )
  4761. begin
  4762. driver.navigate.to(INSTAGRAM_BASE_URL)
  4763. wait_for(driver, css: "body", timeout: 12)
  4764. rescue StandardError
  4765. nil
  4766. end
  4767. next
  4768. end
  4769. capture_task_html(
  4770. driver: driver,
  4771. task_name: "home_story_sync_first_story_opened",
  4772. status: "ok",
  4773. meta: {
  4774. strategy: probe[:target_strategy],
  4775. attempts: attempts,
  4776. target_count: probe[:target_count],
  4777. anchor_count: probe[:anchor_count],
  4778. prefetch_story_usernames: probe[:prefetch_count],
  4779. debug_info: probe[:debug],
  4780. page_debug: probe[:page_debug]
  4781. }
  4782. )
  4783. return true
  4784. end
  4785. end
  4786. # Some IG builds rerender story nodes and invalidate Selenium element handles between probe and click.
  4787. # When we have candidates but no stable handle, click directly in page JS as a fallback.
  4788. if probe[:target_count].to_i.positive?
  4789. js_fallback = click_home_story_open_target_via_js(driver, excluded_usernames: excluded_usernames)
  4790. if js_fallback[:clicked]
  4791. sleep(0.8)
  4792. dom = extract_story_dom_context(driver)
  4793. if story_viewer_ready?(dom)
  4794. capture_task_html(
  4795. driver: driver,
  4796. task_name: "home_story_sync_first_story_opened_js_fallback",
  4797. status: "ok",
  4798. meta: {
  4799. strategy: js_fallback[:strategy],
  4800. attempts: attempts,
  4801. target_count: js_fallback[:count],
  4802. anchor_count: probe[:anchor_count],
  4803. prefetch_story_usernames: probe[:prefetch_count],
  4804. excluded_usernames: excluded_usernames,
  4805. debug_info: probe[:debug],
  4806. page_debug: probe[:page_debug]
  4807. }
  4808. )
  4809. return true
  4810. end
  4811. end
  4812. end
  4813. # If no clickable tray anchors exist, open story route directly from prefetch usernames.
  4814. if !prefetch_route_attempted && attempts >= 3 && Array(probe[:prefetch_usernames]).present?
  4815. prefetch_route_attempted = true
  4816. opened = open_story_from_prefetch_usernames(
  4817. driver: driver,
  4818. usernames: Array(probe[:prefetch_usernames]),
  4819. attempts: attempts,
  4820. probe: probe
  4821. )
  4822. return true if opened
  4823. end
  4824. sleep(1.0)
  4825. # Story tray hydration can stall on initial render; one soft refresh helps recover.
  4826. if attempts == 8 || attempts == 15
  4827. begin
  4828. driver.navigate.refresh
  4829. wait_for(driver, css: "body", timeout: 12)
  4830. rescue StandardError
  4831. nil
  4832. end
  4833. end
  4834. end
  4835. capture_task_html(
  4836. driver: driver,
  4837. task_name: "home_story_sync_no_carousel_found",
  4838. status: "error",
  4839. meta: {
  4840. attempts: attempts,
  4841. elapsed_seconds: (Time.current - started_at).round(2),
  4842. target_count: last_probe[:target_count],
  4843. anchor_count: last_probe[:anchor_count],
  4844. prefetch_story_usernames: last_probe[:prefetch_count],
  4845. target_strategy: last_probe[:target_strategy],
  4846. debug_info: last_probe[:debug],
  4847. page_debug: last_probe[:page_debug],
  4848. current_url: driver.current_url.to_s,
  4849. page_title: begin
  4850. driver.execute_script("return document.title;")
  4851. rescue StandardError
  4852. "unknown"
  4853. end
  4854. }
  4855. )
  4856. raise "No clickable active stories found in the home carousel after waiting #{(Time.current - started_at).round(1)}s (targets=#{last_probe[:target_count].to_i}, anchors=#{last_probe[:anchor_count].to_i}, prefetch=#{last_probe[:prefetch_count].to_i}, strategy=#{last_probe[:target_strategy]})"
  4857. end
  4858. def current_story_context(driver)
  4859. url = driver.current_url.to_s
  4860. ref = current_story_reference(url)
  4861. username = ref.to_s.split(":").first.to_s
  4862. story_id = ref.to_s.split(":")[1].to_s
  4863. dom = extract_story_dom_context(driver)
  4864. if ref.blank? && dom[:og_story_url].present?
  4865. ref = current_story_reference(dom[:og_story_url])
  4866. username = ref.to_s.split(":").first.to_s if username.blank?
  4867. story_id = ref.to_s.split(":")[1].to_s if story_id.blank?
  4868. end
  4869. recovery_needed = false
  4870. if ref.blank?
  4871. fallback_username = extract_username_from_profile_like_path(url)
  4872. if fallback_username.present?
  4873. username = fallback_username
  4874. ref = "#{fallback_username}:#{story_id.presence || 'unknown'}"
  4875. recovery_needed = dom[:story_viewer_active] && !dom[:story_frame_present]
  4876. end
  4877. end
  4878. if dom[:story_viewer_active] && !dom[:story_frame_present]
  4879. # Do not treat profile-preview-like pages as valid story context.
  4880. ref = ""
  4881. story_id = ""
  4882. end
  4883. username = dom[:meta_username].to_s if username.blank? && dom[:meta_username].present?
  4884. media_signature = dom[:media_signature].to_s
  4885. key = if username.present? && story_id.present?
  4886. "#{username}:#{story_id}"
  4887. elsif username.present? && media_signature.present?
  4888. "#{username}:sig:#{media_signature}"
  4889. else
  4890. ref
  4891. end
  4892. {
  4893. ref: ref,
  4894. username: normalize_username(username),
  4895. story_id: story_id,
  4896. url: url,
  4897. story_url_recovery_needed: recovery_needed,
  4898. story_viewer_active: dom[:story_viewer_active],
  4899. story_key: key,
  4900. media_signature: media_signature
  4901. }
  4902. end
  4903. def normalized_story_context_for_processing(driver:, context:)
  4904. ctx = context.is_a?(Hash) ? context.dup : {}
  4905. live_url = driver.current_url.to_s
  4906. live_ref = current_story_reference(live_url)
  4907. if live_ref.present?
  4908. live_username = normalize_username(live_ref.to_s.split(":").first.to_s)
  4909. live_story_id = normalize_story_id_token(live_ref.to_s.split(":")[1].to_s)
  4910. ctx[:ref] = live_ref
  4911. ctx[:username] = live_username if live_username.present?
  4912. ctx[:story_id] = live_story_id if live_story_id.present?
  4913. end
  4914. ctx[:username] = normalize_username(ctx[:username])
  4915. ctx[:story_id] = normalize_story_id_token(ctx[:story_id])
  4916. if ctx[:username].present? && ctx[:story_id].present?
  4917. ctx[:ref] = "#{ctx[:username]}:#{ctx[:story_id]}"
  4918. ctx[:story_key] = "#{ctx[:username]}:#{ctx[:story_id]}"
  4919. end
  4920. ctx[:url] = canonical_story_url(username: ctx[:username], story_id: ctx[:story_id], fallback_url: live_url)
  4921. ctx
  4922. rescue StandardError
  4923. context
  4924. end
  4925. def recover_story_url_context!(driver:, username:, reason:)
  4926. clean_username = normalize_username(username)
  4927. return if clean_username.blank?
  4928. path = "#{INSTAGRAM_BASE_URL}/stories/#{clean_username}/"
  4929. driver.navigate.to(path)
  4930. wait_for(driver, css: "body", timeout: 12)
  4931. dismiss_common_overlays!(driver)
  4932. freeze_story_progress!(driver)
  4933. capture_task_html(
  4934. driver: driver,
  4935. task_name: "home_story_sync_story_context_recovered",
  4936. status: "ok",
  4937. meta: {
  4938. reason: reason,
  4939. username: clean_username,
  4940. current_url: driver.current_url.to_s
  4941. }
  4942. )
  4943. rescue StandardError => e
  4944. capture_task_html(
  4945. driver: driver,
  4946. task_name: "home_story_sync_story_context_recovery_failed",
  4947. status: "error",
  4948. meta: {
  4949. reason: reason,
  4950. username: clean_username,
  4951. error_class: e.class.name,
  4952. error_message: e.message
  4953. }
  4954. )
  4955. end
  4956. def click_next_story_in_carousel!(driver:, current_ref:)
  4957. previous_signature = visible_story_media_signature(driver)
  4958. marker = find_story_next_button(driver)
  4959. capture_task_html(
  4960. driver: driver,
  4961. task_name: "home_story_sync_next_button_probe",
  4962. status: marker[:found] ? "ok" : "error",
  4963. meta: {
  4964. current_ref: current_ref,
  4965. next_found: marker[:found],
  4966. selector: marker[:selector],
  4967. aria_label: marker[:aria_label],
  4968. outer_html_preview: marker[:outer_html_preview]
  4969. }
  4970. )
  4971. if marker[:found]
  4972. begin
  4973. el = driver.find_element(css: "[data-codex-story-next='1']")
  4974. driver.action.move_to(el).click.perform
  4975. rescue StandardError
  4976. begin
  4977. el = driver.find_element(css: "[data-codex-story-next='1']")
  4978. js_click(driver, el)
  4979. rescue StandardError
  4980. driver.action.send_keys(:arrow_right).perform
  4981. end
  4982. ensure
  4983. begin
  4984. driver.execute_script("const el=document.querySelector('[data-codex-story-next=\"1\"]'); if (el) el.removeAttribute('data-codex-story-next');")
  4985. rescue StandardError
  4986. nil
  4987. end
  4988. end
  4989. else
  4990. driver.action.send_keys(:arrow_right).perform
  4991. end
  4992. sleep(1.0)
  4993. new_ref = current_story_reference(driver.current_url.to_s)
  4994. new_signature = visible_story_media_signature(driver)
  4995. moved = (new_ref.present? && new_ref != current_ref) || (new_signature.present? && previous_signature.present? && new_signature != previous_signature)
  4996. capture_task_html(
  4997. driver: driver,
  4998. task_name: "home_story_sync_after_next_click",
  4999. status: moved ? "ok" : "error",
  5000. meta: {
  5001. previous_ref: current_ref,
  5002. new_ref: new_ref,
  5003. previous_signature: previous_signature.to_s.byteslice(0, 120),
  5004. new_signature: new_signature.to_s.byteslice(0, 120),
  5005. moved: moved
  5006. }
  5007. )
  5008. moved
  5009. rescue StandardError => e
  5010. capture_task_html(
  5011. driver: driver,
  5012. task_name: "home_story_sync_next_click_error",
  5013. status: "error",
  5014. meta: { previous_ref: current_ref, error_class: e.class.name, error_message: e.message }
  5015. )
  5016. false
  5017. end
  5018. def visible_story_media_signature(driver)
  5019. payload = driver.execute_script(<<~JS)
  5020. const out = { media_signature: "", title: (document.title || "").toString() };
  5021. const visible = (el) => {
  5022. if (!el) return false;
  5023. const style = window.getComputedStyle(el);
  5024. if (!style || style.display === "none" || style.visibility === "hidden" || style.opacity === "0") return false;
  5025. const r = el.getBoundingClientRect();
  5026. return r.width > 120 && r.height > 120;
  5027. };
  5028. const mediaEl = Array.from(document.querySelectorAll("img,video")).find((el) => visible(el));
  5029. const src = mediaEl ? (mediaEl.currentSrc || mediaEl.src || mediaEl.getAttribute("src") || "") : "";
  5030. out.media_signature = [out.title, src].filter(Boolean).join("|").slice(0, 400);
  5031. return out;
  5032. JS
  5033. payload.is_a?(Hash) ? payload["media_signature"].to_s : ""
  5034. rescue StandardError
  5035. ""
  5036. end
  5037. def extract_story_dom_context(driver)
  5038. payload = driver.execute_script(<<~JS)
  5039. const out = {
  5040. og_story_url: "",
  5041. meta_username: "",
  5042. story_viewer_active: false,
  5043. story_frame_present: false,
  5044. media_signature: ""
  5045. };
  5046. const og = document.querySelector("meta[property='og:url']");
  5047. const ogUrl = (og && og.content) ? og.content.toString() : "";
  5048. if (ogUrl.includes("/stories/")) out.og_story_url = ogUrl;
  5049. const path = window.location.pathname || "";
  5050. if (path.includes("/stories/")) out.story_viewer_active = true;
  5051. if ((document.title || "").toLowerCase().includes("story")) out.story_viewer_active = true;
  5052. if (out.og_story_url) out.story_viewer_active = true;
  5053. const match = out.og_story_url.match(/\\/stories\\/([A-Za-z0-9._]{1,30})/);
  5054. if (match && match[1]) out.meta_username = match[1];
  5055. const visible = (el) => {
  5056. if (!el) return false;
  5057. const style = window.getComputedStyle(el);
  5058. if (!style || style.display === "none" || style.visibility === "hidden" || style.opacity === "0") return false;
  5059. const r = el.getBoundingClientRect();
  5060. return r.width > 120 && r.height > 120;
  5061. };
  5062. const mediaEl = Array.from(document.querySelectorAll("img,video")).find((el) => visible(el));
  5063. const src = mediaEl ? (mediaEl.currentSrc || mediaEl.src || mediaEl.getAttribute("src") || "") : "";
  5064. const rect = mediaEl ? mediaEl.getBoundingClientRect() : { width: 0, height: 0 };
  5065. out.story_frame_present = Boolean(mediaEl && rect.width >= 220 && rect.height >= 220);
  5066. out.media_signature = [document.title || "", src].filter(Boolean).join("|").slice(0, 400);
  5067. return out;
  5068. JS
  5069. return {} unless payload.is_a?(Hash)
  5070. {
  5071. og_story_url: payload["og_story_url"].to_s,
  5072. meta_username: payload["meta_username"].to_s,
  5073. story_viewer_active: ActiveModel::Type::Boolean.new.cast(payload["story_viewer_active"]),
  5074. story_frame_present: ActiveModel::Type::Boolean.new.cast(payload["story_frame_present"]),
  5075. media_signature: payload["media_signature"].to_s
  5076. }
  5077. rescue StandardError
  5078. { og_story_url: "", meta_username: "", story_viewer_active: false, story_frame_present: false, media_signature: "" }
  5079. end
  5080. def find_story_next_button(driver)
  5081. payload = driver.execute_script(<<~JS)
  5082. const isVisible = (el) => {
  5083. if (!el) return false;
  5084. const s = window.getComputedStyle(el);
  5085. if (!s || s.display === "none" || s.visibility === "hidden" || s.opacity === "0") return false;
  5086. const r = el.getBoundingClientRect();
  5087. return r.width > 6 && r.height > 6;
  5088. };
  5089. const candidates = [
  5090. { sel: "button[aria-label='Next']", label: "button[aria-label='Next']" },
  5091. { sel: "button[aria-label='Next story']", label: "button[aria-label='Next story']" },
  5092. { sel: "[role='button'][aria-label='Next']", label: "[role='button'][aria-label='Next']" },
  5093. { sel: "[role='button'][aria-label*='Next']", label: "[role='button'][aria-label*='Next']" },
  5094. { sel: "svg[aria-label='Next']", label: "svg[aria-label='Next']" },
  5095. { sel: "svg[aria-label*='Next']", label: "svg[aria-label*='Next']" }
  5096. ];
  5097. for (const c of candidates) {
  5098. const nodes = Array.from(document.querySelectorAll(c.sel));
  5099. const hit = nodes.find((n) => {
  5100. const target = (n.tagName && n.tagName.toLowerCase() === "svg") ? (n.closest("button,[role='button']") || n) : n;
  5101. return isVisible(target);
  5102. });
  5103. if (hit) {
  5104. const target = (hit.tagName && hit.tagName.toLowerCase() === "svg") ? (hit.closest("button,[role='button']") || hit) : hit;
  5105. try { target.setAttribute("data-codex-story-next", "1"); } catch (e) {}
  5106. return {
  5107. found: true,
  5108. selector: c.label,
  5109. aria_label: target.getAttribute("aria-label") || "",
  5110. outer_html_preview: (target.outerHTML || "").slice(0, 800)
  5111. };
  5112. }
  5113. }
  5114. return { found: false, selector: "", aria_label: "", outer_html_preview: "" };
  5115. JS
  5116. return { found: false, selector: nil, aria_label: nil, outer_html_preview: nil } unless payload.is_a?(Hash)
  5117. {
  5118. found: payload["found"] == true,
  5119. selector: payload["selector"].to_s.presence,
  5120. aria_label: payload["aria_label"].to_s.presence,
  5121. outer_html_preview: payload["outer_html_preview"].to_s.presence
  5122. }
  5123. rescue StandardError
  5124. { found: false, selector: nil, aria_label: nil, outer_html_preview: nil }
  5125. end
  5126. def download_media_with_metadata(url:, user_agent:, redirect_limit: 3)
  5127. uri = URI.parse(url.to_s)
  5128. raise "Invalid media URL" unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
  5129. http = Net::HTTP.new(uri.host, uri.port)
  5130. http.use_ssl = (uri.scheme == "https")
  5131. http.open_timeout = 10
  5132. http.read_timeout = 30
  5133. req = Net::HTTP::Get.new(uri.request_uri)
  5134. req["User-Agent"] = user_agent.presence || "Mozilla/5.0"
  5135. req["Accept"] = "*/*"
  5136. req["Referer"] = INSTAGRAM_BASE_URL
  5137. res = http.request(req)
  5138. if res.is_a?(Net::HTTPRedirection) && res["location"].present? && redirect_limit.to_i.positive?
  5139. return download_media_with_metadata(url: res["location"], user_agent: user_agent, redirect_limit: redirect_limit.to_i - 1)
  5140. end
  5141. raise "Media download failed: HTTP #{res.code}" unless res.is_a?(Net::HTTPSuccess)
  5142. body = res.body.to_s
  5143. raise "Downloaded media is empty" if body.blank?
  5144. content_type = res["content-type"].to_s.split(";").first.presence || "image/jpeg"
  5145. digest = Digest::SHA256.hexdigest("#{uri.path}-#{body.bytesize}")[0, 12]
  5146. {
  5147. bytes: body,
  5148. content_type: content_type,
  5149. filename: "feed_media_#{digest}.#{extension_for_content_type(content_type: content_type)}",
  5150. final_url: uri.to_s
  5151. }
  5152. end
  5153. def extension_for_content_type(content_type:)
  5154. return "jpg" if content_type.include?("jpeg")
  5155. return "png" if content_type.include?("png")
  5156. return "webp" if content_type.include?("webp")
  5157. return "gif" if content_type.include?("gif")
  5158. return "mp4" if content_type.include?("mp4")
  5159. return "mov" if content_type.include?("quicktime")
  5160. "bin"
  5161. end
  5162. def build_auto_engagement_post_payload(profile:, shortcode:, caption:, permalink:, include_story_history: false)
  5163. history = include_story_history ? recent_story_and_post_history(profile: profile) : {}
  5164. history_narrative = profile.history_narrative_text(max_chunks: 3)
  5165. history_chunks = profile.history_narrative_chunks(max_chunks: 6)
  5166. {
  5167. post: {
  5168. shortcode: shortcode,
  5169. caption: caption.to_s.presence,
  5170. taken_at: nil,
  5171. permalink: permalink,
  5172. likes_count: nil,
  5173. comments_count: nil,
  5174. comments: []
  5175. },
  5176. author_profile: {
  5177. username: profile.username,
  5178. display_name: profile.display_name,
  5179. bio: profile.bio,
  5180. can_message: profile.can_message,
  5181. tags: profile.profile_tags.pluck(:name).sort
  5182. },
  5183. rules: {
  5184. require_manual_review: false,
  5185. style: "gen_z_light",
  5186. diversity_requirement: "Avoid repeating prior story comments; generate novel phrasing.",
  5187. engagement_history: history,
  5188. historical_narrative_text: history_narrative,
  5189. historical_narrative_chunks: history_chunks
  5190. }
  5191. }
  5192. end
  5193. def analyze_for_auto_engagement!(analyzable:, payload:, bytes:, content_type:, source_url:)
  5194. media = build_auto_engagement_media_payload(bytes: bytes, content_type: content_type, source_url: source_url)
  5195. run = Ai::Runner.new(account: @account).analyze!(
  5196. purpose: "post",
  5197. analyzable: analyzable,
  5198. payload: payload,
  5199. media: media,
  5200. media_fingerprint: Digest::SHA256.hexdigest(bytes)
  5201. )
  5202. run.dig(:result, :analysis).is_a?(Hash) ? run.dig(:result, :analysis) : {}
  5203. rescue StandardError
  5204. {}
  5205. end
  5206. def build_auto_engagement_media_payload(bytes:, content_type:, source_url:)
  5207. payload = {
  5208. type: "image",
  5209. content_type: content_type,
  5210. bytes: bytes,
  5211. url: source_url.to_s
  5212. }
  5213. if bytes.bytesize <= 2 * 1024 * 1024
  5214. payload[:image_data_url] = "data:#{content_type};base64,#{Base64.strict_encode64(bytes)}"
  5215. end
  5216. payload
  5217. end
  5218. def generate_comment_suggestions_from_analysis!(profile:, payload:, analysis:)
  5219. preparation = ensure_profile_comment_generation_readiness(profile: profile)
  5220. unless ActiveModel::Type::Boolean.new.cast(preparation[:ready_for_comment_generation] || preparation["ready_for_comment_generation"])
  5221. log_automation_event(
  5222. task_name: "comment_generation_blocked_profile_preparation",
  5223. severity: "warn",
  5224. details: {
  5225. profile_id: profile&.id,
  5226. username: profile&.username,
  5227. reason_code: preparation[:reason_code] || preparation["reason_code"],
  5228. reason: preparation[:reason] || preparation["reason"]
  5229. }
  5230. )
  5231. return []
  5232. end
  5233. suggestions = Array(analysis["comment_suggestions"]).map(&:to_s).map(&:strip).reject(&:blank?).uniq
  5234. suggestions = ensure_story_comment_diversity(profile: profile, suggestions: suggestions)
  5235. return suggestions if suggestions.present?
  5236. generated = generate_google_engagement_comments!(
  5237. payload: payload,
  5238. image_description: analysis["image_description"],
  5239. topics: Array(analysis["topics"]),
  5240. author_type: analysis["author_type"].to_s
  5241. )
  5242. ensure_story_comment_diversity(profile: profile, suggestions: generated)
  5243. end
  5244. def ensure_profile_comment_generation_readiness(profile:)
  5245. return { ready_for_comment_generation: false, reason_code: "profile_missing", reason: "Profile missing." } unless profile
  5246. @profile_comment_preparation_cache ||= {}
  5247. cached = @profile_comment_preparation_cache[profile.id]
  5248. return cached if cached.is_a?(Hash)
  5249. summary = Ai::ProfileCommentPreparationService.new(
  5250. account: @account,
  5251. profile: profile,
  5252. posts_limit: 10,
  5253. comments_limit: 12
  5254. ).prepare!
  5255. @profile_comment_preparation_cache[profile.id] = summary.is_a?(Hash) ? summary : {}
  5256. rescue StandardError => e
  5257. {
  5258. ready_for_comment_generation: false,
  5259. reason_code: "profile_preparation_error",
  5260. reason: e.message.to_s,
  5261. error_class: e.class.name
  5262. }
  5263. end
  5264. def recent_story_and_post_history(profile:)
  5265. story_items = profile.instagram_profile_events
  5266. .where(kind: [ "story_analyzed", "story_reply_sent", "story_comment_posted_via_feed" ])
  5267. .order(detected_at: :desc, id: :desc)
  5268. .limit(12)
  5269. .map do |event|
  5270. m = event.metadata.is_a?(Hash) ? event.metadata : {}
  5271. {
  5272. kind: event.kind,
  5273. story_id: m["story_id"].to_s.presence,
  5274. image_description: m["ai_image_description"].to_s.presence,
  5275. sent_comment: m["ai_reply_text"].to_s.presence || m["comment_text"].to_s.presence
  5276. }.compact
  5277. end
  5278. post_items = profile.instagram_profile_posts.recent_first.limit(8).map do |p|
  5279. a = p.analysis.is_a?(Hash) ? p.analysis : {}
  5280. {
  5281. shortcode: p.shortcode,
  5282. image_description: a["image_description"].to_s.presence,
  5283. topics: Array(a["topics"]).first(5)
  5284. }.compact
  5285. end
  5286. {
  5287. prior_story_items: story_items,
  5288. prior_post_items: post_items
  5289. }
  5290. end
  5291. def ensure_story_comment_diversity(profile:, suggestions:)
  5292. candidates = Array(suggestions).map(&:to_s).map(&:strip).reject(&:blank?).uniq
  5293. return [] if candidates.empty?
  5294. history = profile.instagram_profile_events
  5295. .where(kind: [ "story_reply_sent", "story_comment_posted_via_feed" ])
  5296. .order(detected_at: :desc, id: :desc)
  5297. .limit(40)
  5298. .map do |event|
  5299. m = event.metadata.is_a?(Hash) ? event.metadata : {}
  5300. m["ai_reply_text"].to_s.presence || m["comment_text"].to_s.presence
  5301. end
  5302. .compact
  5303. return candidates if history.empty?
  5304. ranked = candidates.sort_by do |candidate|
  5305. history.map { |past| text_similarity_score(candidate, past) }.max.to_f
  5306. end
  5307. unique = ranked.select { |candidate| history.all? { |past| text_similarity_score(candidate, past) < 0.72 } }
  5308. unique.present? ? unique : ranked
  5309. end
  5310. def story_already_replied?(profile:, story_id:, story_ref:, story_url:, media_url:)
  5311. sid = story_id.to_s.strip
  5312. sref = story_ref.to_s.strip
  5313. surl = normalize_story_permalink(story_url)
  5314. mkey = normalize_story_media_key(media_url)
  5315. profile.instagram_profile_events
  5316. .where(kind: "story_reply_sent")
  5317. .order(detected_at: :desc, id: :desc)
  5318. .limit(250)
  5319. .each do |event|
  5320. metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
  5321. event_sid = metadata["story_id"].to_s.strip
  5322. event_sref = metadata["story_ref"].to_s.strip
  5323. event_surl = normalize_story_permalink(metadata["story_url"])
  5324. event_mkey = normalize_story_media_key(metadata["media_url"])
  5325. if sid.present? && (event_sid == sid || event.external_id.to_s == "story_reply_sent:#{sid}")
  5326. return { found: true, matched_by: "story_id", matched_external_id: event.external_id.to_s }
  5327. end
  5328. if sref.present? && event_sref.present? && event_sref == sref
  5329. return { found: true, matched_by: "story_ref", matched_external_id: event.external_id.to_s }
  5330. end
  5331. if surl.present? && event_surl.present? && event_surl == surl
  5332. return { found: true, matched_by: "story_url", matched_external_id: event.external_id.to_s }
  5333. end
  5334. if mkey.present? && event_mkey.present? && event_mkey == mkey
  5335. return { found: true, matched_by: "media_url", matched_external_id: event.external_id.to_s }
  5336. end
  5337. end
  5338. { found: false, matched_by: nil, matched_external_id: nil }
  5339. end
  5340. def normalize_story_permalink(url)
  5341. value = url.to_s.strip
  5342. return "" if value.blank?
  5343. begin
  5344. uri = URI.parse(value)
  5345. path = uri.path.to_s
  5346. rescue StandardError
  5347. path = value
  5348. end
  5349. return "" unless path.include?("/stories/")
  5350. path.sub(%r{/\z}, "")
  5351. end
  5352. def normalize_story_media_key(url)
  5353. value = url.to_s.strip
  5354. return "" if value.blank?
  5355. begin
  5356. uri = URI.parse(value)
  5357. host = uri.host.to_s
  5358. path = uri.path.to_s
  5359. return "" if host.blank? || path.blank?
  5360. "#{host}#{path}"
  5361. rescue StandardError
  5362. value
  5363. end
  5364. end
  5365. def text_similarity_score(a, b)
  5366. left = a.to_s.downcase.scan(/[a-z0-9]+/).uniq
  5367. right = b.to_s.downcase.scan(/[a-z0-9]+/).uniq
  5368. return 0.0 if left.empty? || right.empty?
  5369. (left & right).length.to_f / [ left.length, right.length ].max.to_f
  5370. end
  5371. def comment_on_post_via_ui!(driver:, shortcode:, comment_text:)
  5372. driver.navigate.to("#{INSTAGRAM_BASE_URL}/p/#{shortcode}/")
  5373. wait_for(driver, css: "body", timeout: 12)
  5374. dismiss_common_overlays!(driver)
  5375. capture_task_html(driver: driver, task_name: "auto_engage_post_opened", status: "ok", meta: { shortcode: shortcode })
  5376. field = wait_for_comment_textbox(driver: driver)
  5377. return false unless field
  5378. focus_and_type(driver: driver, field: field, text: comment_text)
  5379. posted = click_comment_post_button(driver: driver)
  5380. sleep(0.6)
  5381. capture_task_html(
  5382. driver: driver,
  5383. task_name: "auto_engage_post_comment_submit",
  5384. status: posted ? "ok" : "error",
  5385. meta: { shortcode: shortcode, posted: posted }
  5386. )
  5387. posted
  5388. rescue StandardError
  5389. false
  5390. end
  5391. def comment_on_story_via_ui!(driver:, comment_text:)
  5392. field = wait_for_comment_textbox(driver: driver, timeout: 12)
  5393. if !field
  5394. availability = detect_story_reply_availability(driver)
  5395. return {
  5396. posted: false,
  5397. reason: availability[:reason],
  5398. marker_text: availability[:marker_text]
  5399. }
  5400. end
  5401. capture_task_html(driver: driver, task_name: "auto_engage_story_reply_box_ready", status: "ok")
  5402. focus_and_type(driver: driver, field: field, text: comment_text)
  5403. posted = click_comment_post_button(driver: driver)
  5404. if posted
  5405. return { posted: true, reason: "post_button_clicked" }
  5406. end
  5407. enter_posted = send_enter_comment(driver: driver, field: field)
  5408. return { posted: true, reason: "submitted_with_enter" } if enter_posted
  5409. { posted: false, reason: "submit_controls_not_found" }
  5410. rescue StandardError => e
  5411. { posted: false, reason: "exception:#{e.class.name}" }
  5412. end
  5413. # API-first story reply path discovered from captured network traces:
  5414. # 1) POST /api/v1/direct_v2/create_group_thread/ with recipient_users=["<reel_user_id>"]
  5415. # 2) POST /api/v1/direct_v2/threads/broadcast/reel_share/ with media_id="<story_id>_<reel_user_id>", reel_id, thread_id, text
  5416. def comment_on_story_via_api!(story_id:, story_username:, comment_text:)
  5417. text = comment_text.to_s.strip
  5418. return { posted: false, method: "api", reason: "blank_comment_text" } if text.blank?
  5419. sid = story_id.to_s.strip.gsub(/[^0-9]/, "")
  5420. return { posted: false, method: "api", reason: "missing_story_id" } if sid.blank?
  5421. username = normalize_username(story_username)
  5422. return { posted: false, method: "api", reason: "missing_story_username" } if username.blank?
  5423. user_id = story_user_id_for(username: username)
  5424. return { posted: false, method: "api", reason: "missing_story_user_id" } if user_id.blank?
  5425. thread_id = direct_thread_id_for_user(user_id: user_id)
  5426. return { posted: false, method: "api", reason: "missing_thread_id" } if thread_id.blank?
  5427. payload = {
  5428. action: "send_item",
  5429. client_context: story_api_client_context,
  5430. media_id: "#{sid}_#{user_id}",
  5431. reel_id: user_id,
  5432. text: text,
  5433. thread_id: thread_id
  5434. }
  5435. body = ig_api_post_form_json(
  5436. path: "/api/v1/direct_v2/threads/broadcast/reel_share/",
  5437. referer: "#{INSTAGRAM_BASE_URL}/stories/#{username}/#{sid}/",
  5438. form: payload
  5439. )
  5440. return { posted: false, method: "api", reason: "empty_api_response" } unless body.is_a?(Hash)
  5441. status = body["status"].to_s
  5442. if status == "ok"
  5443. return {
  5444. posted: true,
  5445. method: "api",
  5446. reason: "reel_share_sent",
  5447. api_status: status,
  5448. api_thread_id: body.dig("payload", "thread_id").to_s.presence,
  5449. api_item_id: body.dig("payload", "item_id").to_s.presence
  5450. }
  5451. end
  5452. {
  5453. posted: false,
  5454. method: "api",
  5455. reason: body["message"].to_s.presence || body.dig("payload", "message").to_s.presence || body["error_type"].to_s.presence || "api_status_#{status.presence || 'unknown'}",
  5456. api_status: status.presence || "unknown",
  5457. api_http_status: body["_http_status"],
  5458. api_error_code: body.dig("payload", "error_code").to_s.presence || body["error_code"].to_s.presence
  5459. }
  5460. rescue StandardError => e
  5461. { posted: false, method: "api", reason: "api_exception:#{e.class.name}" }
  5462. end
  5463. def story_user_id_for(username:)
  5464. @story_user_id_cache ||= {}
  5465. uname = normalize_username(username)
  5466. return "" if uname.blank?
  5467. cached = @story_user_id_cache[uname].to_s
  5468. return cached if cached.present?
  5469. web_info = fetch_web_profile_info(uname)
  5470. user = web_info.is_a?(Hash) ? web_info.dig("data", "user") : nil
  5471. uid = user.is_a?(Hash) ? user["id"].to_s.strip : ""
  5472. @story_user_id_cache[uname] = uid if uid.present?
  5473. uid
  5474. rescue StandardError
  5475. ""
  5476. end
  5477. def direct_thread_id_for_user(user_id:)
  5478. create_direct_thread_for_user(user_id: user_id, use_cache: true)[:thread_id].to_s
  5479. rescue StandardError
  5480. ""
  5481. end
  5482. def create_direct_thread_for_user(user_id:, use_cache: true)
  5483. @story_reply_thread_cache ||= {}
  5484. uid = user_id.to_s.strip
  5485. return { thread_id: "", reason: "blank_user_id" } if uid.blank?
  5486. if use_cache
  5487. cached = @story_reply_thread_cache[uid].to_s
  5488. return { thread_id: cached, reason: "cache_hit" } if cached.present?
  5489. end
  5490. body = ig_api_post_form_json(
  5491. path: "/api/v1/direct_v2/create_group_thread/",
  5492. referer: "#{INSTAGRAM_BASE_URL}/direct/new/",
  5493. form: { recipient_users: [ uid ].to_json }
  5494. )
  5495. return { thread_id: "", reason: "empty_api_response" } unless body.is_a?(Hash)
  5496. thread_id =
  5497. body["thread_id"].to_s.presence ||
  5498. body.dig("thread", "thread_id").to_s.presence ||
  5499. body.dig("thread", "id").to_s.presence
  5500. if thread_id.present?
  5501. @story_reply_thread_cache[uid] = thread_id
  5502. return {
  5503. thread_id: thread_id,
  5504. reason: "thread_created",
  5505. api_status: body["status"].to_s.presence || "ok",
  5506. api_http_status: body["_http_status"]
  5507. }
  5508. end
  5509. {
  5510. thread_id: "",
  5511. reason: body["message"].to_s.presence || body["error_type"].to_s.presence || "missing_thread_id",
  5512. api_status: body["status"].to_s.presence || "unknown",
  5513. api_http_status: body["_http_status"],
  5514. api_error_code: body["error_code"].to_s.presence || body.dig("payload", "error_code").to_s.presence
  5515. }
  5516. rescue StandardError => e
  5517. { thread_id: "", reason: "api_exception:#{e.class.name}" }
  5518. end
  5519. def story_api_client_context
  5520. "#{(Time.current.to_f * 1000).to_i}#{rand(1_000_000..9_999_999)}"
  5521. end
  5522. def ig_api_post_form_json(path:, referer:, form:)
  5523. uri = URI.parse(path.to_s.start_with?("http") ? path.to_s : "#{INSTAGRAM_BASE_URL}#{path}")
  5524. http = Net::HTTP.new(uri.host, uri.port)
  5525. http.use_ssl = (uri.scheme == "https")
  5526. http.open_timeout = 10
  5527. http.read_timeout = 20
  5528. req = Net::HTTP::Post.new(uri.request_uri)
  5529. req["User-Agent"] = @account.user_agent.presence || "Mozilla/5.0"
  5530. req["Accept"] = "application/json, text/plain, */*"
  5531. req["Content-Type"] = "application/x-www-form-urlencoded; charset=UTF-8"
  5532. req["X-Requested-With"] = "XMLHttpRequest"
  5533. req["X-IG-App-ID"] = (@account.auth_snapshot.dig("ig_app_id").presence || "936619743392459")
  5534. req["Referer"] = referer.to_s
  5535. csrf = @account.cookies.find { |c| c["name"].to_s == "csrftoken" }&.dig("value").to_s
  5536. req["X-CSRFToken"] = csrf if csrf.present?
  5537. req["Cookie"] = cookie_header_for(@account.cookies)
  5538. req.set_form_data(form.transform_values { |v| v.to_s })
  5539. res = http.request(req)
  5540. return nil unless res["content-type"].to_s.include?("json")
  5541. body = JSON.parse(res.body.to_s)
  5542. body["_http_status"] = res.code.to_i
  5543. body
  5544. rescue StandardError
  5545. nil
  5546. end
  5547. def detect_story_reply_availability(driver)
  5548. payload = driver.execute_script(<<~JS)
  5549. const out = { reason: "reply_box_not_found", marker_text: "" };
  5550. const norm = (value) => (value || "").toString().replace(/\\s+/g, " ").trim().toLowerCase();
  5551. const texts = Array.from(document.querySelectorAll("body *"))
  5552. .filter((el) => {
  5553. if (!el) return false;
  5554. if (el.children && el.children.length > 0) return false;
  5555. const r = el.getBoundingClientRect();
  5556. return r.width > 3 && r.height > 3;
  5557. })
  5558. .map((el) => norm(el.innerText || el.textContent))
  5559. .filter((t) => t.length > 0 && t.length < 140);
  5560. const joined = texts.join(" | ");
  5561. const matchAny = (patterns) => patterns.find((p) => joined.includes(p));
  5562. const repliesNotAllowed = matchAny([
  5563. "replies aren't available",
  5564. "replies are turned off",
  5565. "replies are off",
  5566. "can't reply to this story",
  5567. "you can't reply to this story",
  5568. "reply unavailable"
  5569. ]);
  5570. if (repliesNotAllowed) {
  5571. out.reason = "replies_not_allowed";
  5572. out.marker_text = repliesNotAllowed;
  5573. return out;
  5574. }
  5575. const unavailable = matchAny([
  5576. "story unavailable",
  5577. "this story is unavailable",
  5578. "content unavailable",
  5579. "not available right now",
  5580. "unavailable"
  5581. ]);
  5582. if (unavailable) {
  5583. out.reason = "reply_unavailable";
  5584. out.marker_text = unavailable;
  5585. return out;
  5586. }
  5587. return out;
  5588. JS
  5589. return { reason: "reply_box_not_found", marker_text: "" } unless payload.is_a?(Hash)
  5590. {
  5591. reason: payload["reason"].to_s.presence || "reply_box_not_found",
  5592. marker_text: payload["marker_text"].to_s
  5593. }
  5594. rescue StandardError
  5595. { reason: "reply_box_not_found", marker_text: "" }
  5596. end
  5597. def story_reply_skip_status_for(comment_result = nil, reason: nil)
  5598. reason = reason.to_s if reason.present?
  5599. reason ||= comment_result.to_h[:reason].to_s
  5600. case reason
  5601. when "api_can_reply_false"
  5602. { reason_code: "api_can_reply_false", status: "Replies not allowed (API)" }
  5603. when "reply_box_not_found"
  5604. { reason_code: "reply_box_not_found", status: "Reply box not found" }
  5605. when "replies_not_allowed"
  5606. { reason_code: "replies_not_allowed", status: "Replies not allowed" }
  5607. when "reply_unavailable"
  5608. { reason_code: "reply_unavailable", status: "Unavailable" }
  5609. when "reply_precheck_error"
  5610. { reason_code: "reply_precheck_error", status: "Unavailable" }
  5611. else
  5612. { reason_code: "comment_submit_failed", status: "Unavailable" }
  5613. end
  5614. end
  5615. def story_reply_capability_from_api(username:, story_id:)
  5616. item = resolve_story_item_via_api(username: username, story_id: story_id)
  5617. return { known: false, reply_possible: nil, reason_code: "api_story_not_found", status: "Unknown" } unless item.is_a?(Hash)
  5618. can_reply = item[:can_reply]
  5619. return { known: false, reply_possible: nil, reason_code: "api_can_reply_missing", status: "Unknown" } if can_reply.nil?
  5620. if can_reply
  5621. { known: true, reply_possible: true, reason_code: nil, status: "Reply available (API)" }
  5622. else
  5623. { known: true, reply_possible: false, reason_code: "api_can_reply_false", status: "Replies not allowed (API)" }
  5624. end
  5625. rescue StandardError => e
  5626. { known: false, reply_possible: nil, reason_code: "api_capability_error", status: "Unknown" }
  5627. end
  5628. def story_external_profile_link_context_from_api(username:, story_id:, cache: nil)
  5629. item = resolve_story_item_via_api(username: username, story_id: story_id, cache: cache)
  5630. return { known: false, has_external_profile_link: false, reason_code: "api_story_not_found", linked_username: "", linked_profile_url: "", marker_text: "", linked_targets: [] } unless item.is_a?(Hash)
  5631. has_external = ActiveModel::Type::Boolean.new.cast(item[:api_has_external_profile_indicator])
  5632. return { known: true, has_external_profile_link: false, reason_code: nil, linked_username: "", linked_profile_url: "", marker_text: "", linked_targets: [] } unless has_external
  5633. reason = item[:api_external_profile_reason].to_s.presence || "api_external_profile_indicator"
  5634. targets = Array(item[:api_external_profile_targets]).map(&:to_s).map(&:strip).reject(&:blank?).uniq
  5635. {
  5636. known: true,
  5637. has_external_profile_link: true,
  5638. reason_code: reason,
  5639. linked_username: "",
  5640. linked_profile_url: "",
  5641. marker_text: reason,
  5642. linked_targets: targets
  5643. }
  5644. rescue StandardError
  5645. { known: false, has_external_profile_link: false, reason_code: "api_external_context_error", linked_username: "", linked_profile_url: "", marker_text: "", linked_targets: [] }
  5646. end
  5647. def check_story_reply_capability(driver:)
  5648. field = wait_for_comment_textbox(driver: driver, timeout: 2)
  5649. return { reply_possible: true, reason_code: nil, status: "Reply available", marker_text: "", submission_reason: "reply_box_found" } if field
  5650. availability = detect_story_reply_availability(driver)
  5651. status = story_reply_skip_status_for(reason: availability[:reason])
  5652. {
  5653. reply_possible: false,
  5654. reason_code: status[:reason_code],
  5655. status: status[:status],
  5656. marker_text: availability[:marker_text].to_s,
  5657. submission_reason: availability[:reason].to_s
  5658. }
  5659. rescue StandardError => e
  5660. {
  5661. reply_possible: false,
  5662. reason_code: "reply_precheck_error",
  5663. status: "Unavailable",
  5664. marker_text: "",
  5665. submission_reason: "exception:#{e.class.name}"
  5666. }
  5667. end
  5668. def react_to_story_if_available!(driver:)
  5669. payload = driver.execute_script(<<~JS)
  5670. const out = { reacted: false, reason: "reaction_controls_not_found", marker_text: "" };
  5671. const norm = (value) => (value || "").toString().replace(/\\s+/g, " ").trim().toLowerCase();
  5672. const isVisible = (el) => {
  5673. if (!el) return false;
  5674. const s = window.getComputedStyle(el);
  5675. if (!s || s.display === "none" || s.visibility === "hidden" || s.opacity === "0") return false;
  5676. const r = el.getBoundingClientRect();
  5677. if (r.width < 4 || r.height < 4) return false;
  5678. return r.bottom > 0 && r.top < window.innerHeight;
  5679. };
  5680. const candidates = Array.from(document.querySelectorAll("button, [role='button']"))
  5681. .filter((el) => {
  5682. if (!isVisible(el)) return false;
  5683. const r = el.getBoundingClientRect();
  5684. return r.top >= Math.max(0, window.innerHeight * 0.45);
  5685. });
  5686. const scoreFor = (el) => {
  5687. const text = norm(el.innerText || el.textContent);
  5688. const aria = norm(el.getAttribute && el.getAttribute("aria-label"));
  5689. const title = norm(el.getAttribute && el.getAttribute("title"));
  5690. const all = `${text} | ${aria} | ${title}`;
  5691. if (all.includes("quick reaction")) return 100;
  5692. if (all.includes("reaction")) return 95;
  5693. if (all.includes("react")) return 90;
  5694. if (all.includes("like")) return 75;
  5695. if (all.includes("heart")) return 70;
  5696. if (/[❤️❤🔥😍😂👏😢😮]/.test(text)) return 60;
  5697. return 0;
  5698. };
  5699. const sorted = candidates
  5700. .map((el) => ({ el, score: scoreFor(el) }))
  5701. .filter((entry) => entry.score > 0)
  5702. .sort((a, b) => b.score - a.score);
  5703. const chosen = sorted[0];
  5704. if (!chosen || !chosen.el) return out;
  5705. const marker = norm(chosen.el.innerText || chosen.el.textContent) || norm(chosen.el.getAttribute && chosen.el.getAttribute("aria-label")) || "reaction_button";
  5706. try {
  5707. chosen.el.click();
  5708. out.reacted = true;
  5709. out.reason = "reaction_button_clicked";
  5710. out.marker_text = marker;
  5711. return out;
  5712. } catch (e) {
  5713. out.reason = "reaction_click_failed";
  5714. out.marker_text = marker;
  5715. return out;
  5716. }
  5717. JS
  5718. return { reacted: false, reason: "reaction_detection_error", marker_text: "" } unless payload.is_a?(Hash)
  5719. {
  5720. reacted: ActiveModel::Type::Boolean.new.cast(payload["reacted"]),
  5721. reason: payload["reason"].to_s.presence || "reaction_controls_not_found",
  5722. marker_text: payload["marker_text"].to_s
  5723. }
  5724. rescue StandardError => e
  5725. { reacted: false, reason: "reaction_exception:#{e.class.name}", marker_text: "" }
  5726. end
  5727. def dm_interaction_retry_pending?(profile)
  5728. return false unless profile
  5729. return false unless profile.dm_interaction_state.to_s == "unavailable"
  5730. retry_after = profile.dm_interaction_retry_after_at
  5731. retry_after.present? && retry_after > Time.current
  5732. end
  5733. def mark_profile_dm_state!(profile:, state:, reason:, retry_after_at: nil)
  5734. return unless profile
  5735. can_message_value =
  5736. case state.to_s
  5737. when "messageable"
  5738. true
  5739. when "unknown"
  5740. nil
  5741. else
  5742. false
  5743. end
  5744. payload = {
  5745. can_message: can_message_value,
  5746. restriction_reason: can_message_value == true ? nil : reason.to_s.presence,
  5747. dm_interaction_state: state.to_s.presence,
  5748. dm_interaction_reason: reason.to_s.presence,
  5749. dm_interaction_checked_at: Time.current,
  5750. dm_interaction_retry_after_at: retry_after_at
  5751. }
  5752. profile.update!(payload)
  5753. rescue StandardError
  5754. nil
  5755. end
  5756. def apply_dm_state_from_send_result(profile:, result:)
  5757. return unless profile
  5758. return unless result.is_a?(Hash)
  5759. reason = result[:reason].to_s.presence || "send_failed"
  5760. retry_after =
  5761. if result[:api_http_status].to_i == 403
  5762. Time.current + STORY_INTERACTION_RETRY_DAYS.days
  5763. else
  5764. Time.current + 12.hours
  5765. end
  5766. mark_profile_dm_state!(
  5767. profile: profile,
  5768. state: "unavailable",
  5769. reason: reason,
  5770. retry_after_at: retry_after
  5771. )
  5772. end
  5773. def profile_interaction_retry_pending?(profile)
  5774. return false unless profile
  5775. return false unless profile.story_interaction_state.to_s == "unavailable"
  5776. retry_after = profile.story_interaction_retry_after_at
  5777. retry_after.present? && retry_after > Time.current
  5778. end
  5779. def mark_profile_interaction_state!(profile:, state:, reason:, reaction_available:, retry_after_at: nil)
  5780. return unless profile
  5781. profile.update!(
  5782. story_interaction_state: state.to_s.presence,
  5783. story_interaction_reason: reason.to_s.presence,
  5784. story_interaction_checked_at: Time.current,
  5785. story_interaction_retry_after_at: retry_after_at,
  5786. story_reaction_available: reaction_available.nil? ? profile.story_reaction_available : ActiveModel::Type::Boolean.new.cast(reaction_available)
  5787. )
  5788. rescue StandardError
  5789. nil
  5790. end
  5791. def attach_reply_comment_to_downloaded_event!(downloaded_event:, comment_text:)
  5792. return if downloaded_event.blank? || comment_text.blank?
  5793. meta = downloaded_event.metadata.is_a?(Hash) ? downloaded_event.metadata.deep_dup : {}
  5794. meta["reply_comment"] = comment_text.to_s
  5795. downloaded_event.update!(metadata: meta)
  5796. end
  5797. def wait_for_comment_textbox(driver:, timeout: 10)
  5798. Selenium::WebDriver::Wait.new(timeout: timeout).until do
  5799. el =
  5800. driver.find_elements(css: "textarea[aria-label*='comment'], textarea[aria-label*='Comment'], textarea[placeholder*='comment'], textarea[placeholder*='Comment'], textarea[placeholder*='reply'], textarea[placeholder*='Reply']").find { |x| x.displayed? rescue false } ||
  5801. driver.find_elements(css: "div[role='textbox'][contenteditable='true']").find { |x| x.displayed? rescue false }
  5802. break el if el
  5803. end
  5804. rescue Selenium::WebDriver::Error::TimeoutError
  5805. nil
  5806. end
  5807. def focus_and_type(driver:, field:, text:)
  5808. begin
  5809. driver.execute_script("arguments[0].scrollIntoView({block:'center'});", field)
  5810. rescue StandardError
  5811. nil
  5812. end
  5813. begin
  5814. field.click
  5815. rescue StandardError
  5816. nil
  5817. end
  5818. if field.tag_name.to_s.downcase == "div"
  5819. driver.execute_script("arguments[0].focus();", field)
  5820. field.send_keys(text.to_s)
  5821. else
  5822. field.send_keys([:control, "a"])
  5823. field.send_keys(:backspace)
  5824. field.send_keys(text.to_s)
  5825. end
  5826. end
  5827. def click_comment_post_button(driver:)
  5828. button =
  5829. driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][normalize-space()='Post']").find { |el| element_enabled?(el) } ||
  5830. driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][normalize-space()='Reply']").find { |el| element_enabled?(el) } ||
  5831. driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][contains(translate(normalize-space(.), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'post')]").find { |el| element_enabled?(el) } ||
  5832. driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][contains(translate(normalize-space(.), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'reply')]").find { |el| element_enabled?(el) }
  5833. return false unless button
  5834. begin
  5835. driver.action.move_to(button).click.perform
  5836. rescue StandardError
  5837. js_click(driver, button)
  5838. end
  5839. true
  5840. rescue StandardError
  5841. false
  5842. end
  5843. def send_enter_comment(driver:, field:)
  5844. begin
  5845. driver.action.click(field).send_keys(:enter).perform
  5846. true
  5847. rescue StandardError
  5848. false
  5849. end
  5850. end
  5851. def freeze_story_progress!(driver)
  5852. driver.execute_script(<<~JS)
  5853. const pauseStory = () => {
  5854. try {
  5855. document.querySelectorAll("video").forEach((v) => {
  5856. try { v.pause(); } catch (e) {}
  5857. try { v.playbackRate = 0; } catch (e) {}
  5858. });
  5859. } catch (e) {}
  5860. try {
  5861. document.querySelectorAll("*").forEach((el) => {
  5862. if (!el || !el.style) return;
  5863. if (el.getAttribute("role") === "progressbar" || el.className.toString().toLowerCase().includes("progress")) {
  5864. try { el.style.animationPlayState = "paused"; } catch (e) {}
  5865. try { el.style.transitionDuration = "999999s"; } catch (e) {}
  5866. }
  5867. });
  5868. } catch (e) {}
  5869. };
  5870. pauseStory();
  5871. JS
  5872. rescue StandardError
  5873. nil
  5874. end
  5875. def normalize_story_id_token(value)
  5876. token = value.to_s.strip
  5877. return "" if token.blank?
  5878. token = token.split(/[?#]/).first.to_s
  5879. token = token.split("/").first.to_s
  5880. return "" if token.blank?
  5881. return "" if token.casecmp("unknown").zero?
  5882. return "" if token.casecmp("sig").zero?
  5883. return "" if token.start_with?("sig:")
  5884. digits = token.gsub(/\D/, "")
  5885. digits.presence || ""
  5886. rescue StandardError
  5887. ""
  5888. end
  5889. def canonical_story_url(username:, story_id:, fallback_url:)
  5890. uname = normalize_username(username)
  5891. sid = normalize_story_id_token(story_id)
  5892. return "#{INSTAGRAM_BASE_URL}/stories/#{uname}/#{sid}/" if uname.present? && sid.present?
  5893. return "#{INSTAGRAM_BASE_URL}/stories/#{uname}/" if uname.present?
  5894. fallback_url.to_s
  5895. rescue StandardError
  5896. fallback_url.to_s
  5897. end
  5898. def story_id_hint_from_media_url(url)
  5899. value = url.to_s.strip
  5900. return "" if value.blank?
  5901. begin
  5902. uri = URI.parse(value)
  5903. query = Rack::Utils.parse_query(uri.query.to_s)
  5904. raw_ig_cache = query["ig_cache_key"].to_s
  5905. if raw_ig_cache.present?
  5906. decoded = Base64.decode64(CGI.unescape(raw_ig_cache)).to_s
  5907. if (m = decoded.match(/(\d{8,})/))
  5908. return m[1].to_s
  5909. end
  5910. end
  5911. rescue StandardError
  5912. nil
  5913. end
  5914. if (m = value.match(%r{/stories/[A-Za-z0-9._]{1,30}/(\d{8,})}))
  5915. return m[1].to_s
  5916. end
  5917. ""
  5918. rescue StandardError
  5919. ""
  5920. end
  5921. def current_story_reference(url)
  5922. value = url.to_s
  5923. return "" unless value.include?("/stories/")
  5924. rest = value.split("/stories/").last.to_s
  5925. username = rest.split("/").first.to_s
  5926. story_id = rest.split("/")[1].to_s
  5927. return "" if username.blank?
  5928. "#{username}:#{story_id}"
  5929. end
  5930. def extract_username_from_profile_like_path(url)
  5931. value = url.to_s
  5932. return "" if value.blank?
  5933. begin
  5934. uri = URI.parse(value)
  5935. path = uri.path.to_s
  5936. rescue StandardError
  5937. path = value
  5938. end
  5939. segment = path.split("/").reject(&:blank?).first.to_s
  5940. return "" if segment.blank?
  5941. return "" if segment.casecmp("stories").zero?
  5942. return "" unless segment.match?(/\A[a-zA-Z0-9._]{1,30}\z/)
  5943. segment
  5944. end
  5945. def ensure_story_same_or_reload!(driver:, expected_ref:, username:)
  5946. return if expected_ref.to_s.blank?
  5947. return if current_story_reference(driver.current_url.to_s) == expected_ref
  5948. story_id = expected_ref.to_s.split(":")[1].to_s
  5949. path = story_id.present? ? "/stories/#{username}/#{story_id}/" : "/stories/#{username}/"
  5950. driver.navigate.to("#{INSTAGRAM_BASE_URL}#{path}")
  5951. wait_for(driver, css: "body", timeout: 12)
  5952. dismiss_common_overlays!(driver)
  5953. capture_task_html(
  5954. driver: driver,
  5955. task_name: "auto_engage_story_reloaded",
  5956. status: "ok",
  5957. meta: { expected_ref: expected_ref, current_ref: current_story_reference(driver.current_url.to_s) }
  5958. )
  5959. end
  5960. def evaluate_story_image_quality(download:, media:)
  5961. bytes = download.is_a?(Hash) ? download[:bytes].to_s.b : "".b
  5962. content_type = download.is_a?(Hash) ? download[:content_type].to_s : ""
  5963. width = media[:width].to_i
  5964. height = media[:height].to_i
  5965. return { skip: true, reason: "empty_download", entropy: nil } if bytes.blank?
  5966. return { skip: true, reason: "too_small_bytes", entropy: nil } if bytes.bytesize < 1500
  5967. return { skip: true, reason: "tiny_dimensions", entropy: nil } if width.positive? && height.positive? && (width < 120 || height < 120)
  5968. entropy = bytes_entropy(bytes)
  5969. # Heuristic: placeholder/blank assets are often very small and very low entropy.
  5970. if content_type.start_with?("image/") && bytes.bytesize < 45_000 && entropy < 4.2
  5971. return { skip: true, reason: "low_entropy_small_image", entropy: entropy }
  5972. end
  5973. { skip: false, reason: nil, entropy: entropy }
  5974. rescue StandardError
  5975. { skip: false, reason: nil, entropy: nil }
  5976. end
  5977. def bytes_entropy(bytes)
  5978. data = bytes.to_s.b
  5979. return 0.0 if data.empty?
  5980. counts = Array.new(256, 0)
  5981. data.each_byte { |b| counts[b] += 1 }
  5982. len = data.bytesize.to_f
  5983. entropy = 0.0
  5984. counts.each do |count|
  5985. next if count.zero?
  5986. p = count / len
  5987. entropy -= p * Math.log2(p)
  5988. end
  5989. entropy.round(4)
  5990. end
  5991. def detect_story_ad_context(driver:, media: nil)
  5992. payload = driver.execute_script(<<~JS)
  5993. const out = { ad_detected: false, reason: "", marker_text: "" };
  5994. const explicitMarkers = [
  5995. "sponsored",
  5996. "sponsored post",
  5997. "sponsored content",
  5998. "promoted",
  5999. "paid partnership",
  6000. "advertisement"
  6001. ];
  6002. const norm = (value) => (value || "").toString().replace(/\\s+/g, " ").trim().toLowerCase();
  6003. const isVisible = (el) => {
  6004. if (!el) return false;
  6005. const s = window.getComputedStyle(el);
  6006. if (!s || s.display === "none" || s.visibility === "hidden" || s.opacity === "0") return false;
  6007. const r = el.getBoundingClientRect();
  6008. if (r.width < 4 || r.height < 4) return false;
  6009. return r.bottom > 0 && r.top < window.innerHeight;
  6010. };
  6011. const inStoryHeaderZone = (el) => {
  6012. const r = el.getBoundingClientRect();
  6013. return r.top >= 0 && r.top <= Math.max(240, window.innerHeight * 0.38);
  6014. };
  6015. const matchesExplicitMarker = (text) => {
  6016. if (!text) return "";
  6017. for (const m of explicitMarkers) {
  6018. if (text === m) return m;
  6019. if (text.startsWith(`${m} `)) return m;
  6020. if (text.endsWith(` ${m}`)) return m;
  6021. if (text.includes(` ${m} `)) return m;
  6022. }
  6023. return "";
  6024. };
  6025. const markerRegex = /\b(sponsored|promoted|paid partnership|advertisement)\b/;
  6026. const path = (window.location && window.location.pathname || "").toLowerCase();
  6027. if (!path.includes("/stories/")) return out;
  6028. // Keep the search focused on story header text nodes to avoid false positives from unrelated controls.
  6029. const nodes = Array.from(document.querySelectorAll("header span, header a, header [role='button'], [data-testid*='story'] span, [data-testid*='story'] a"));
  6030. for (const node of nodes) {
  6031. if (!isVisible(node)) continue;
  6032. if (!inStoryHeaderZone(node)) continue;
  6033. const text = norm(node.innerText || node.textContent);
  6034. const aria = norm(node.getAttribute && node.getAttribute("aria-label"));
  6035. if (text.length > 60 && aria.length > 60) continue;
  6036. const marker = matchesExplicitMarker(text) || matchesExplicitMarker(aria);
  6037. if (!marker) continue;
  6038. out.ad_detected = true;
  6039. out.reason = "header_marker_match";
  6040. out.marker_text = text || aria || marker;
  6041. return out;
  6042. }
  6043. // Backup detector: scan concise visible labels in the top story zone.
  6044. // This catches some sponsored labels that are not rendered inside <header>.
  6045. const topNodes = Array.from(document.querySelectorAll("span, a, div, button")).filter((node) => {
  6046. if (!isVisible(node)) return false;
  6047. if (!inStoryHeaderZone(node)) return false;
  6048. const text = norm(node.innerText || node.textContent);
  6049. if (!text || text.length > 42) return false;
  6050. return true;
  6051. });
  6052. for (const node of topNodes) {
  6053. const text = norm(node.innerText || node.textContent);
  6054. const aria = norm(node.getAttribute && node.getAttribute("aria-label"));
  6055. const title = norm(node.getAttribute && node.getAttribute("title"));
  6056. const candidate = [text, aria, title].find((value) => value && markerRegex.test(value));
  6057. if (!candidate) continue;
  6058. out.ad_detected = true;
  6059. out.reason = "top_zone_marker_match";
  6060. out.marker_text = candidate;
  6061. return out;
  6062. }
  6063. return out;
  6064. JS
  6065. return { ad_detected: false, reason: "", marker_text: "", signal_source: "", signal_confidence: "", debug_hint: "" } unless payload.is_a?(Hash)
  6066. result = {
  6067. ad_detected: ActiveModel::Type::Boolean.new.cast(payload["ad_detected"]),
  6068. reason: payload["reason"].to_s,
  6069. marker_text: payload["marker_text"].to_s,
  6070. signal_source: "dom_header",
  6071. signal_confidence: "high",
  6072. debug_hint: ""
  6073. }
  6074. return result if result[:ad_detected]
  6075. media_url = media.is_a?(Hash) ? media[:url].to_s : ""
  6076. media_hint = ad_hint_from_media_url(media_url)
  6077. return result.merge(signal_source: "", signal_confidence: "", debug_hint: "") if media_hint.blank?
  6078. if media_hint[:confidence] == "high"
  6079. {
  6080. ad_detected: true,
  6081. reason: "media_url_ad_marker",
  6082. marker_text: media_hint[:marker].to_s,
  6083. signal_source: "media_url",
  6084. signal_confidence: media_hint[:confidence].to_s,
  6085. debug_hint: media_hint[:marker].to_s
  6086. }
  6087. else
  6088. {
  6089. ad_detected: false,
  6090. reason: "",
  6091. marker_text: "",
  6092. signal_source: "media_url",
  6093. signal_confidence: media_hint[:confidence].to_s,
  6094. debug_hint: media_hint[:marker].to_s
  6095. }
  6096. end
  6097. rescue StandardError
  6098. { ad_detected: false, reason: "", marker_text: "", signal_source: "", signal_confidence: "", debug_hint: "" }
  6099. end
  6100. def detect_story_external_profile_link_context(driver:, current_username:)
  6101. current = normalize_username(current_username).to_s
  6102. payload = driver.execute_script(<<~JS, current)
  6103. const currentUsername = (arguments[0] || "").toString().trim().toLowerCase();
  6104. const out = { has_external_profile_link: false, linked_username: "", linked_profile_url: "", marker_text: "" };
  6105. const norm = (value) => (value || "").toString().replace(/\\s+/g, " ").trim();
  6106. const normLower = (value) => norm(value).toLowerCase();
  6107. const isVisible = (el) => {
  6108. if (!el) return false;
  6109. const s = window.getComputedStyle(el);
  6110. if (!s || s.display === "none" || s.visibility === "hidden" || s.opacity === "0") return false;
  6111. const r = el.getBoundingClientRect();
  6112. if (r.width < 8 || r.height < 8) return false;
  6113. return r.bottom > 0 && r.top < window.innerHeight;
  6114. };
  6115. const parseLinkedUsername = (href) => {
  6116. try {
  6117. const u = new URL(href, window.location.origin);
  6118. if (!/instagram\\.com$/i.test(u.hostname)) return "";
  6119. const segs = u.pathname.split("/").filter(Boolean);
  6120. if (segs.length !== 1) return "";
  6121. const candidate = (segs[0] || "").toLowerCase();
  6122. if (!/^[a-z0-9._]{1,30}$/.test(candidate)) return "";
  6123. return candidate;
  6124. } catch (e) {
  6125. return "";
  6126. }
  6127. };
  6128. const candidates = Array.from(document.querySelectorAll("a[href], [role='link'][href], [role='link'][data-href]"));
  6129. for (const el of candidates) {
  6130. if (!isVisible(el)) continue;
  6131. const href = (el.getAttribute("href") || el.getAttribute("data-href") || "").toString();
  6132. if (!href) continue;
  6133. const linked = parseLinkedUsername(href);
  6134. if (!linked) continue;
  6135. if (linked === currentUsername) continue;
  6136. const text = norm(el.innerText || el.textContent);
  6137. const aria = norm(el.getAttribute && el.getAttribute("aria-label"));
  6138. const title = norm(el.getAttribute && el.getAttribute("title"));
  6139. const marker = [text, aria, title].find((v) => v && v.length > 0) || linked;
  6140. const markerLower = normLower(marker);
  6141. // Ignore common mention-style links; they do not necessarily indicate reshared content.
  6142. if (markerLower.startsWith("@")) continue;
  6143. if (markerLower.includes("mention")) continue;
  6144. out.has_external_profile_link = true;
  6145. out.linked_username = linked;
  6146. out.linked_profile_url = href;
  6147. out.marker_text = marker;
  6148. return out;
  6149. }
  6150. return out;
  6151. JS
  6152. return { has_external_profile_link: false, linked_username: "", linked_profile_url: "", marker_text: "" } unless payload.is_a?(Hash)
  6153. {
  6154. has_external_profile_link: ActiveModel::Type::Boolean.new.cast(payload["has_external_profile_link"]),
  6155. linked_username: payload["linked_username"].to_s,
  6156. linked_profile_url: payload["linked_profile_url"].to_s,
  6157. marker_text: payload["marker_text"].to_s
  6158. }
  6159. rescue StandardError
  6160. { has_external_profile_link: false, linked_username: "", linked_profile_url: "", marker_text: "" }
  6161. end
  6162. def ad_hint_from_media_url(url)
  6163. value = url.to_s.strip
  6164. return nil if value.blank?
  6165. down = value.downcase
  6166. return { marker: "_nc_ad_query", confidence: "low" } if down.include?("_nc_ad=")
  6167. return { marker: "ad_image_marker", confidence: "high" } if down.include?("ad_image")
  6168. return { marker: "ads_image_marker", confidence: "high" } if down.include?("ads_image")
  6169. return { marker: "ad_urlgen_marker", confidence: "high" } if down.include?("ad_urlgen")
  6170. return { marker: "page_instagram_web_story_marker", confidence: "low" } if down.include?("page_instagram_web_story")
  6171. uri = URI.parse(value)
  6172. query = Rack::Utils.parse_query(uri.query.to_s)
  6173. raw_efg = query["efg"].to_s
  6174. return nil if raw_efg.blank?
  6175. decoded = decode_urlsafe_base64(raw_efg)
  6176. return nil if decoded.blank?
  6177. text = decoded.downcase
  6178. return { marker: "efg_ad_image", confidence: "high" } if text.include?("ad_image")
  6179. return { marker: "efg_ads_image", confidence: "high" } if text.include?("ads_image")
  6180. return { marker: "efg_ad_urlgen", confidence: "high" } if text.include?("ad_urlgen")
  6181. return { marker: "efg_page_instagram_web_story", confidence: "low" } if text.include?("page_instagram_web_story")
  6182. nil
  6183. rescue StandardError
  6184. nil
  6185. end
  6186. def decode_urlsafe_base64(value)
  6187. src = value.to_s.tr("-_", "+/")
  6188. src += "=" * ((4 - (src.length % 4)) % 4)
  6189. Base64.decode64(src)
  6190. rescue StandardError
  6191. nil
  6192. end
  6193. def bool(value)
  6194. ActiveModel::Type::Boolean.new.cast(value)
  6195. end
  6196. def normalize_same_site(value)
  6197. token = value.to_s.strip.downcase
  6198. return nil if token.blank?
  6199. case token
  6200. when "lax" then "Lax"
  6201. when "strict" then "Strict"
  6202. when "none", "no_restriction" then "None"
  6203. end
  6204. end
  6205. def logged_out_page?(driver)
  6206. body = driver.page_source.to_s.downcase
  6207. body.include?("create an account or log in to instagram") ||
  6208. body.include?("\"is_logged_in\":false") ||
  6209. driver.find_elements(css: "input[name='username']").any?
  6210. rescue StandardError
  6211. false
  6212. end
  6213. def dismiss_common_overlays!(driver)
  6214. # Best-effort: these overlays can prevent story tray elements from being inserted in the DOM.
  6215. dismiss_texts = [
  6216. "Allow all cookies",
  6217. "Accept all",
  6218. "Only allow essential cookies",
  6219. "Not now",
  6220. "Not Now"
  6221. ]
  6222. dismiss_texts.each do |text|
  6223. button = driver.find_elements(xpath: "//button[normalize-space()='#{text}']").first
  6224. next unless button&.displayed?
  6225. button.click
  6226. sleep(0.3)
  6227. rescue StandardError
  6228. next
  6229. end
  6230. end
  6231. def js_click(driver, element)
  6232. driver.execute_script(<<~JS, element)
  6233. const el = arguments[0];
  6234. if (!el) return false;
  6235. try { el.scrollIntoView({ block: "center", inline: "nearest" }); } catch (e) {}
  6236. try { el.click(); return true; } catch (e) {}
  6237. return false;
  6238. JS
  6239. end
  6240. def read_web_storage(driver, storage_name)
  6241. script = <<~JS
  6242. const s = window[#{storage_name.inspect}];
  6243. const out = [];
  6244. for (let i = 0; i < s.length; i++) {
  6245. const k = s.key(i);
  6246. out.push({ key: k, value: s.getItem(k) });
  6247. }
  6248. return out;
  6249. JS
  6250. driver.execute_script(script).map { |entry| entry.transform_keys(&:to_s) }
  6251. rescue StandardError
  6252. []
  6253. end
  6254. def write_web_storage(driver, storage_name, entries)
  6255. safe_entries = Array(entries).map do |entry|
  6256. entry = entry.to_h
  6257. { "key" => entry["key"] || entry[:key], "value" => entry["value"] || entry[:value] }
  6258. end.select { |e| e["key"].present? }
  6259. script = <<~JS
  6260. const s = window[#{storage_name.inspect}];
  6261. const entries = arguments[0] || [];
  6262. for (const e of entries) {
  6263. try { s.setItem(e.key, e.value); } catch (err) {}
  6264. }
  6265. return entries.length;
  6266. JS
  6267. driver.execute_script(script, safe_entries)
  6268. rescue StandardError
  6269. nil
  6270. end
  6271. def with_task_capture(driver:, task_name:, meta: {})
  6272. result = yield
  6273. capture_task_html(driver: driver, task_name: task_name, status: "ok", meta: meta)
  6274. result
  6275. rescue StandardError => e
  6276. capture_task_html(
  6277. driver: driver,
  6278. task_name: task_name,
  6279. status: "error",
  6280. meta: meta.merge(
  6281. error_class: e.class.name,
  6282. error_message: e.message,
  6283. error_backtrace: Array(e.backtrace).take(40)
  6284. )
  6285. )
  6286. raise
  6287. end
  6288. def capture_task_html(driver:, task_name:, status:, meta: {})
  6289. timestamp = Time.current.utc.strftime("%Y%m%dT%H%M%S.%LZ")
  6290. slug = task_name.to_s.downcase.gsub(/[^a-z0-9]+/, "_").gsub(/\A_|_\z/, "")
  6291. root = DEBUG_CAPTURE_DIR.join(Time.current.utc.strftime("%Y%m%d"))
  6292. FileUtils.mkdir_p(root)
  6293. base = "#{timestamp}_#{slug}_#{status}"
  6294. html_path = root.join("#{base}.html")
  6295. json_path = root.join("#{base}.json")
  6296. png_path = root.join("#{base}.png")
  6297. html = begin
  6298. driver.page_source.to_s
  6299. rescue StandardError => e
  6300. "<!-- unable to capture page_source: #{e.class}: #{e.message} -->"
  6301. end
  6302. metadata = {
  6303. timestamp: Time.current.utc.iso8601(3),
  6304. task_name: task_name,
  6305. status: status,
  6306. account_username: @account.username,
  6307. current_url: safe_driver_value(driver) { driver.current_url },
  6308. page_title: safe_driver_value(driver) { driver.title }
  6309. }.merge(meta)
  6310. # Best-effort capture of browser console logs. Not all driver builds support this.
  6311. logs =
  6312. safe_driver_value(driver) do
  6313. next nil unless driver.respond_to?(:logs)
  6314. types = driver.logs.available_types
  6315. next nil unless types.include?(:browser) || types.include?("browser")
  6316. driver.logs.get(:browser).map do |entry|
  6317. {
  6318. timestamp: entry.timestamp,
  6319. level: entry.level,
  6320. message: entry.message.to_s.byteslice(0, 2000)
  6321. }
  6322. end.last(200)
  6323. end
  6324. metadata[:browser_console] = logs if logs.present?
  6325. perf =
  6326. safe_driver_value(driver) do
  6327. next nil unless driver.respond_to?(:logs)
  6328. types = driver.logs.available_types
  6329. next nil unless types.include?(:performance) || types.include?("performance")
  6330. driver.logs.get(:performance).map do |entry|
  6331. { timestamp: entry.timestamp, message: entry.message.to_s.byteslice(0, 20_000) }
  6332. end.last(300)
  6333. end
  6334. if perf.present?
  6335. metadata[:performance_summary] = summarize_performance_logs(perf)
  6336. metadata[:performance_logs] = filter_performance_logs(perf)
  6337. end
  6338. # Screenshot helps catch transient toasts/overlays that aren't obvious from HTML.
  6339. safe_driver_value(driver) do
  6340. driver.save_screenshot(png_path.to_s)
  6341. true
  6342. end
  6343. metadata[:screenshot] = png_path.to_s if File.exist?(png_path)
  6344. File.write(html_path, html)
  6345. File.write(json_path, JSON.pretty_generate(metadata))
  6346. rescue StandardError => e
  6347. Rails.logger.warn("Failed to write debug capture for #{task_name}: #{e.class}: #{e.message}")
  6348. end
  6349. def summarize_performance_logs(entries)
  6350. # Chrome "performance" log entries are JSON strings.
  6351. # We keep a small summary so the JSON artifacts stay readable.
  6352. requests = []
  6353. responses = {}
  6354. Array(entries).each do |e|
  6355. raw = e.is_a?(Hash) ? e[:message] || e["message"] : nil
  6356. next if raw.blank?
  6357. msg = JSON.parse(raw) rescue nil
  6358. inner = msg.is_a?(Hash) ? msg["message"] : nil
  6359. next unless inner.is_a?(Hash)
  6360. method = inner["method"].to_s
  6361. params = inner["params"].is_a?(Hash) ? inner["params"] : {}
  6362. case method
  6363. when "Network.requestWillBeSent"
  6364. req = params["request"].is_a?(Hash) ? params["request"] : {}
  6365. url = req["url"].to_s
  6366. next if url.blank?
  6367. next unless interesting_perf_url?(url)
  6368. requests << { request_id: params["requestId"], url: url, http_method: req["method"] }
  6369. when "Network.responseReceived"
  6370. resp = params["response"].is_a?(Hash) ? params["response"] : {}
  6371. url = resp["url"].to_s
  6372. next if url.blank?
  6373. next unless interesting_perf_url?(url)
  6374. responses[params["requestId"].to_s] = { url: url, status: resp["status"], mime_type: resp["mimeType"] }
  6375. end
  6376. end
  6377. recent = requests.last(40).map do |r|
  6378. rid = r[:request_id].to_s
  6379. r.merge(response: responses[rid])
  6380. end
  6381. {
  6382. interesting_request_count: requests.size,
  6383. recent_interesting: recent
  6384. }
  6385. rescue StandardError => e
  6386. { error: "#{e.class}: #{e.message}" }
  6387. end
  6388. def filter_performance_logs(entries)
  6389. # Keep only likely-relevant messages to avoid huge JSON artifacts.
  6390. Array(entries).select do |e|
  6391. raw = e.is_a?(Hash) ? e[:message] || e["message"] : nil
  6392. next false if raw.blank?
  6393. raw.include?("Network.requestWillBeSent") ||
  6394. raw.include?("Network.responseReceived") ||
  6395. raw.include?("Network.loadingFailed")
  6396. end.last(200)
  6397. end
  6398. def interesting_perf_url?(url)
  6399. u = url.to_s
  6400. u.include?("/api/v1/") ||
  6401. u.include?("/graphql") ||
  6402. u.include?("/direct") ||
  6403. u.include?("direct_v2") ||
  6404. u.include?("broadcast")
  6405. end
  6406. def safe_driver_value(driver)
  6407. yield
  6408. rescue StandardError
  6409. nil
  6410. end
  6411. def with_recoverable_session(label:, max_attempts: 2)
  6412. attempt = 0
  6413. begin
  6414. attempt += 1
  6415. yield
  6416. rescue StandardError => e
  6417. raise unless disconnected_session_error?(e)
  6418. raise if attempt >= max_attempts
  6419. Rails.logger.warn("Instagram #{label} recovered from browser disconnect (attempt #{attempt}/#{max_attempts}).")
  6420. sleep(1)
  6421. retry
  6422. end
  6423. end
  6424. def disconnected_session_error?(error)
  6425. return true if error.is_a?(Selenium::WebDriver::Error::InvalidSessionIdError)
  6426. message = error.message.to_s.downcase
  6427. message.include?("not connected to devtools") ||
  6428. message.include?("session deleted as the browser has closed the connection") ||
  6429. message.include?("disconnected")
  6430. end
  6431. end
  6432. end

app/services/instagram/client/bulk_message_send_service.rb

0.0% lines covered

100.0% branches covered

102 relevant lines. 0 lines covered and 102 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Instagram
  2. class Client
  3. class BulkMessageSendService
  4. def initialize(
  5. with_recoverable_session:,
  6. with_authenticated_driver:,
  7. find_profile_for_interaction:,
  8. dm_interaction_retry_pending:,
  9. send_direct_message_via_api:,
  10. mark_profile_dm_state:,
  11. apply_dm_state_from_send_result:,
  12. disconnected_session_error:,
  13. open_dm:,
  14. send_text_message_from_driver:
  15. )
  16. @with_recoverable_session = with_recoverable_session
  17. @with_authenticated_driver = with_authenticated_driver
  18. @find_profile_for_interaction = find_profile_for_interaction
  19. @dm_interaction_retry_pending = dm_interaction_retry_pending
  20. @send_direct_message_via_api = send_direct_message_via_api
  21. @mark_profile_dm_state = mark_profile_dm_state
  22. @apply_dm_state_from_send_result = apply_dm_state_from_send_result
  23. @disconnected_session_error = disconnected_session_error
  24. @open_dm = open_dm
  25. @send_text_message_from_driver = send_text_message_from_driver
  26. end
  27. def call(usernames:, message_text:)
  28. raise "Message cannot be blank" if message_text.to_s.strip.blank?
  29. with_recoverable_session.call(label: "send_messages") do
  30. sent = 0
  31. failed = 0
  32. fallback_usernames = []
  33. usernames.each do |username|
  34. begin
  35. profile = find_profile_for_interaction.call(username: username)
  36. if dm_interaction_retry_pending.call(profile)
  37. failed += 1
  38. next
  39. end
  40. api_result = send_direct_message_via_api.call(username: username, message_text: message_text)
  41. if api_result[:sent]
  42. mark_profile_dm_state.call(
  43. profile: profile,
  44. state: "messageable",
  45. reason: "api_text_sent",
  46. retry_after_at: nil
  47. )
  48. sent += 1
  49. else
  50. apply_dm_state_from_send_result.call(profile: profile, result: api_result)
  51. fallback_usernames << username
  52. end
  53. rescue StandardError => e
  54. raise if disconnected_session_error.call(e)
  55. fallback_usernames << username
  56. end
  57. end
  58. if fallback_usernames.any?
  59. with_authenticated_driver.call do |driver|
  60. fallback_usernames.each do |username|
  61. begin
  62. next unless open_dm.call(driver, username)
  63. send_text_message_from_driver.call(driver, message_text)
  64. profile = find_profile_for_interaction.call(username: username)
  65. mark_profile_dm_state.call(
  66. profile: profile,
  67. state: "messageable",
  68. reason: "ui_fallback_sent",
  69. retry_after_at: nil
  70. )
  71. sent += 1
  72. sleep(0.8)
  73. rescue StandardError => e
  74. raise if disconnected_session_error.call(e)
  75. failed += 1
  76. end
  77. end
  78. end
  79. end
  80. unresolved = usernames.length - sent - failed
  81. failed += unresolved if unresolved.positive?
  82. {
  83. attempted: usernames.length,
  84. sent: sent,
  85. failed: failed
  86. }
  87. end
  88. end
  89. private
  90. attr_reader :with_recoverable_session,
  91. :with_authenticated_driver,
  92. :find_profile_for_interaction,
  93. :dm_interaction_retry_pending,
  94. :send_direct_message_via_api,
  95. :mark_profile_dm_state,
  96. :apply_dm_state_from_send_result,
  97. :disconnected_session_error,
  98. :open_dm,
  99. :send_text_message_from_driver
  100. end
  101. end
  102. end

app/services/instagram/client/profile_analysis_dataset_service.rb

0.0% lines covered

100.0% branches covered

57 relevant lines. 0 lines covered and 57 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Instagram
  2. class Client
  3. class ProfileAnalysisDatasetService
  4. def initialize(
  5. fetch_profile_details:,
  6. fetch_web_profile_info:,
  7. fetch_profile_feed_items_for_analysis:,
  8. extract_post_for_analysis:,
  9. enrich_missing_post_comments_via_browser:,
  10. normalize_username:
  11. )
  12. @fetch_profile_details = fetch_profile_details
  13. @fetch_web_profile_info = fetch_web_profile_info
  14. @fetch_profile_feed_items_for_analysis = fetch_profile_feed_items_for_analysis
  15. @extract_post_for_analysis = extract_post_for_analysis
  16. @enrich_missing_post_comments_via_browser = enrich_missing_post_comments_via_browser
  17. @normalize_username = normalize_username
  18. end
  19. def call(username:, posts_limit: nil, comments_limit: 8)
  20. normalized_username = normalize_username.call(username)
  21. raise "Username cannot be blank" if normalized_username.blank?
  22. details = fetch_profile_details.call(username: normalized_username)
  23. web_info = fetch_web_profile_info.call(normalized_username)
  24. user = web_info.is_a?(Hash) ? web_info.dig("data", "user") : nil
  25. user_id = user.is_a?(Hash) ? user["id"].to_s.strip.presence : nil
  26. user_id ||= details[:ig_user_id].to_s.strip.presence if details.is_a?(Hash)
  27. feed_result = fetch_profile_feed_items_for_analysis.call(
  28. username: normalized_username,
  29. user_id: user_id,
  30. posts_limit: posts_limit
  31. )
  32. items = Array(feed_result[:items])
  33. posts = items.filter_map do |item|
  34. extract_post_for_analysis.call(item, comments_limit: comments_limit, referer_username: normalized_username)
  35. end
  36. enrich_missing_post_comments_via_browser.call(
  37. username: normalized_username,
  38. posts: posts,
  39. comments_limit: comments_limit
  40. )
  41. {
  42. profile: details,
  43. posts: posts,
  44. fetched_at: Time.current,
  45. feed_fetch: feed_result.except(:items)
  46. }
  47. end
  48. private
  49. attr_reader :fetch_profile_details,
  50. :fetch_web_profile_info,
  51. :fetch_profile_feed_items_for_analysis,
  52. :extract_post_for_analysis,
  53. :enrich_missing_post_comments_via_browser,
  54. :normalize_username
  55. end
  56. end
  57. end

app/services/instagram/client/session_validation_service.rb

0.0% lines covered

100.0% branches covered

116 relevant lines. 0 lines covered and 116 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Instagram
  2. class Client
  3. class SessionValidationService
  4. AUTHENTICATED_SELECTORS = [
  5. "svg[aria-label='Home']",
  6. "svg[aria-label='Search']",
  7. "img[alt*='profile picture']",
  8. "a[href*='/direct/inbox/']",
  9. "[aria-label='Settings']",
  10. ".x9f619",
  11. ".x78zum5",
  12. ".x1i10hfl"
  13. ].freeze
  14. PROFILE_INDICATORS = [
  15. "img[alt*='profile picture']",
  16. "h2",
  17. "a[href*='/followers/']",
  18. "a[href*='/following/']"
  19. ].freeze
  20. MIN_REQUIRED_INDICATORS = 3
  21. def initialize(account:, with_driver:, wait_for:, logger: nil, base_url: Client::INSTAGRAM_BASE_URL)
  22. @account = account
  23. @with_driver = with_driver
  24. @wait_for = wait_for
  25. @logger = logger
  26. @base_url = base_url
  27. end
  28. def call
  29. return { valid: false, message: "No cookies stored" } if account.cookies.empty?
  30. with_driver.call(headless: true) do |driver|
  31. driver.navigate.to(base_url)
  32. wait_for.call(driver, css: "body", timeout: 12)
  33. if login_redirect?(driver.current_url)
  34. return { valid: false, message: "Session expired - redirected to login page" }
  35. end
  36. begin
  37. authenticated_found, found_selectors = count_visible_indicators(driver, AUTHENTICATED_SELECTORS)
  38. if authenticated_found >= MIN_REQUIRED_INDICATORS
  39. return validate_profile_access(driver: driver, authenticated_found: authenticated_found, found_selectors: found_selectors)
  40. end
  41. {
  42. valid: false,
  43. message: "Session appears to be invalid - only found #{authenticated_found}/#{AUTHENTICATED_SELECTORS.length} authentication indicators",
  44. details: {
  45. homepage_indicators: authenticated_found,
  46. required_indicators: MIN_REQUIRED_INDICATORS,
  47. found_selectors: found_selectors
  48. }
  49. }
  50. rescue StandardError => e
  51. { valid: false, message: "Session validation error: #{e.message}" }
  52. end
  53. end
  54. rescue StandardError => e
  55. { valid: false, message: "Validation failed: #{e.message}" }
  56. end
  57. private
  58. attr_reader :account, :with_driver, :wait_for, :logger, :base_url
  59. def validate_profile_access(driver:, authenticated_found:, found_selectors:)
  60. driver.navigate.to("#{base_url}/#{account.username}/")
  61. wait_for.call(driver, css: "body", timeout: 8)
  62. if login_redirect?(driver.current_url)
  63. return { valid: false, message: "Session invalid - cannot access profile page" }
  64. end
  65. profile_elements_found = PROFILE_INDICATORS.sum do |selector|
  66. begin
  67. visible_element_count(driver: driver, selector: selector).positive? ? 1 : 0
  68. rescue StandardError
  69. 0
  70. end
  71. end
  72. {
  73. valid: true,
  74. message: "Session is valid and authenticated (found #{authenticated_found}/#{AUTHENTICATED_SELECTORS.length} indicators, #{profile_elements_found} profile elements)",
  75. details: {
  76. homepage_indicators: authenticated_found,
  77. profile_indicators: profile_elements_found,
  78. found_selectors: found_selectors
  79. }
  80. }
  81. end
  82. def count_visible_indicators(driver, selectors)
  83. found_selectors = []
  84. count = 0
  85. selectors.each do |selector|
  86. begin
  87. visible_count = visible_element_count(driver: driver, selector: selector)
  88. next unless visible_count.positive?
  89. count += 1
  90. found_selectors << "#{selector} (#{visible_count})"
  91. rescue StandardError => e
  92. if ignorable_selector_error?(e)
  93. next
  94. end
  95. logger&.warn("Validation selector error for #{selector}: #{e.message}")
  96. end
  97. end
  98. [count, found_selectors]
  99. end
  100. def visible_element_count(driver:, selector:)
  101. elements = driver.find_elements(css: selector)
  102. elements.select(&:displayed?).length
  103. end
  104. def ignorable_selector_error?(error)
  105. error.is_a?(Selenium::WebDriver::Error::NoSuchElementError) ||
  106. error.is_a?(Selenium::WebDriver::Error::StaleElementReferenceError)
  107. rescue NameError
  108. false
  109. end
  110. def login_redirect?(url)
  111. value = url.to_s
  112. value.include?("/accounts/login/") || value.include?("/accounts/emailsignup/")
  113. end
  114. end
  115. end
  116. end

app/services/instagram/client/single_message_send_service.rb

0.0% lines covered

100.0% branches covered

89 relevant lines. 0 lines covered and 89 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Instagram
  2. class Client
  3. class SingleMessageSendService
  4. def initialize(
  5. with_recoverable_session:,
  6. with_authenticated_driver:,
  7. with_task_capture:,
  8. find_profile_for_interaction:,
  9. dm_interaction_retry_pending:,
  10. send_direct_message_via_api:,
  11. mark_profile_dm_state:,
  12. apply_dm_state_from_send_result:,
  13. open_dm:,
  14. send_text_message_from_driver:
  15. )
  16. @with_recoverable_session = with_recoverable_session
  17. @with_authenticated_driver = with_authenticated_driver
  18. @with_task_capture = with_task_capture
  19. @find_profile_for_interaction = find_profile_for_interaction
  20. @dm_interaction_retry_pending = dm_interaction_retry_pending
  21. @send_direct_message_via_api = send_direct_message_via_api
  22. @mark_profile_dm_state = mark_profile_dm_state
  23. @apply_dm_state_from_send_result = apply_dm_state_from_send_result
  24. @open_dm = open_dm
  25. @send_text_message_from_driver = send_text_message_from_driver
  26. end
  27. def call(username:, message_text:)
  28. with_recoverable_session.call(label: "send_message") do
  29. profile = find_profile_for_interaction.call(username: username)
  30. if dm_interaction_retry_pending.call(profile)
  31. retry_after = profile&.dm_interaction_retry_after_at
  32. stamp = retry_after&.utc&.iso8601
  33. raise "DM retry pending for #{username}#{stamp.present? ? " until #{stamp}" : ""}"
  34. end
  35. api_result = send_direct_message_via_api.call(username: username, message_text: message_text)
  36. if api_result[:sent]
  37. mark_profile_dm_state.call(
  38. profile: profile,
  39. state: "messageable",
  40. reason: "api_text_sent",
  41. retry_after_at: nil
  42. )
  43. return true
  44. end
  45. apply_dm_state_from_send_result.call(profile: profile, result: api_result)
  46. with_authenticated_driver.call do |driver|
  47. raise "Message cannot be blank" if message_text.to_s.strip.blank?
  48. raise "Username cannot be blank" if username.to_s.strip.blank?
  49. ok =
  50. with_task_capture.call(driver: driver, task_name: "dm_open", meta: { username: username }) do
  51. open_dm.call(driver, username)
  52. end
  53. raise "Unable to open DM for #{username}" unless ok
  54. with_task_capture.call(
  55. driver: driver,
  56. task_name: "dm_send_text",
  57. meta: {
  58. username: username,
  59. message_preview: message_text.to_s.strip.byteslice(0, 80),
  60. api_fallback_reason: api_result[:reason].to_s
  61. }
  62. ) do
  63. send_text_message_from_driver.call(driver, message_text.to_s, expected_username: username)
  64. end
  65. mark_profile_dm_state.call(
  66. profile: profile,
  67. state: "messageable",
  68. reason: "ui_fallback_sent",
  69. retry_after_at: nil
  70. )
  71. sleep(0.6)
  72. true
  73. end
  74. end
  75. end
  76. private
  77. attr_reader :with_recoverable_session,
  78. :with_authenticated_driver,
  79. :with_task_capture,
  80. :find_profile_for_interaction,
  81. :dm_interaction_retry_pending,
  82. :send_direct_message_via_api,
  83. :mark_profile_dm_state,
  84. :apply_dm_state_from_send_result,
  85. :open_dm,
  86. :send_text_message_from_driver
  87. end
  88. end
  89. end

app/services/instagram/client/sync_data_service.rb

0.0% lines covered

100.0% branches covered

64 relevant lines. 0 lines covered and 64 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Instagram
  2. class Client
  3. class SyncDataService
  4. def initialize(
  5. account:,
  6. with_recoverable_session:,
  7. with_authenticated_driver:,
  8. collect_conversation_users:,
  9. collect_story_users:,
  10. fetch_eligibility:,
  11. source_for:
  12. )
  13. @account = account
  14. @with_recoverable_session = with_recoverable_session
  15. @with_authenticated_driver = with_authenticated_driver
  16. @collect_conversation_users = collect_conversation_users
  17. @collect_story_users = collect_story_users
  18. @fetch_eligibility = fetch_eligibility
  19. @source_for = source_for
  20. end
  21. def call
  22. with_recoverable_session.call(label: "sync") do
  23. with_authenticated_driver.call do |driver|
  24. conversation_users = collect_conversation_users.call(driver)
  25. story_users = collect_story_users.call(driver)
  26. usernames = (conversation_users.keys + story_users.keys).uniq
  27. usernames.each do |username|
  28. eligibility =
  29. if conversation_users.key?(username)
  30. { can_message: true, restriction_reason: nil }
  31. else
  32. fetch_eligibility.call(driver, username)
  33. end
  34. recipient = account.recipients.find_or_initialize_by(username: username)
  35. recipient.display_name = conversation_users.dig(username, :display_name) || story_users.dig(username, :display_name) || username
  36. recipient.source = source_for.call(username, conversation_users, story_users)
  37. recipient.story_visible = story_users.key?(username)
  38. recipient.can_message = eligibility[:can_message]
  39. recipient.restriction_reason = eligibility[:restriction_reason]
  40. recipient.save!
  41. peer = account.conversation_peers.find_or_initialize_by(username: username)
  42. peer.display_name = recipient.display_name
  43. peer.last_message_at = Time.current
  44. peer.save!
  45. end
  46. account.update!(last_synced_at: Time.current)
  47. {
  48. recipients: account.recipients.count,
  49. eligible: account.recipients.eligible.count
  50. }
  51. end
  52. end
  53. end
  54. private
  55. attr_reader :account,
  56. :with_recoverable_session,
  57. :with_authenticated_driver,
  58. :collect_conversation_users,
  59. :collect_story_users,
  60. :fetch_eligibility,
  61. :source_for
  62. end
  63. end
  64. end

app/services/instagram/client/sync_follow_graph_service.rb

0.0% lines covered

100.0% branches covered

86 relevant lines. 0 lines covered and 86 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Instagram
  2. class Client
  3. class SyncFollowGraphService
  4. def initialize(
  5. account:,
  6. with_recoverable_session:,
  7. with_authenticated_driver:,
  8. collect_conversation_users:,
  9. collect_story_users:,
  10. collect_follow_list:,
  11. upsert_follow_list:
  12. )
  13. @account = account
  14. @with_recoverable_session = with_recoverable_session
  15. @with_authenticated_driver = with_authenticated_driver
  16. @collect_conversation_users = collect_conversation_users
  17. @collect_story_users = collect_story_users
  18. @collect_follow_list = collect_follow_list
  19. @upsert_follow_list = upsert_follow_list
  20. end
  21. def call
  22. with_recoverable_session.call(label: "sync_follow_graph") do
  23. with_authenticated_driver.call do |driver|
  24. raise "Instagram username must be set on the account before syncing" if account.username.blank?
  25. conversation_users = collect_conversation_users.call(driver)
  26. story_users = collect_story_users.call(driver)
  27. followers = collect_follow_list.call(driver, list_kind: :followers, profile_username: account.username)
  28. following = collect_follow_list.call(driver, list_kind: :following, profile_username: account.username)
  29. follower_usernames = followers.keys
  30. following_usernames = following.keys
  31. mutuals = follower_usernames & following_usernames
  32. InstagramProfile.transaction do
  33. account.instagram_profiles.update_all(following: false, follows_you: false)
  34. upsert_follow_list.call(followers, following_flag: false, follows_you_flag: true)
  35. upsert_follow_list.call(following, following_flag: true, follows_you_flag: false)
  36. account.instagram_profiles.where(username: mutuals).update_all(last_synced_at: Time.current)
  37. messageable_usernames = conversation_users.keys
  38. account.instagram_profiles.where(username: messageable_usernames).update_all(
  39. can_message: true,
  40. restriction_reason: nil,
  41. dm_interaction_state: "messageable",
  42. dm_interaction_reason: "inbox_thread_seen",
  43. dm_interaction_checked_at: Time.current,
  44. dm_interaction_retry_after_at: nil
  45. )
  46. end
  47. mark_story_visibility!(story_users: story_users)
  48. account.update!(last_synced_at: Time.current)
  49. {
  50. followers: follower_usernames.length,
  51. following: following_usernames.length,
  52. mutuals: mutuals.length,
  53. conversation_threads: conversation_users.length,
  54. profiles_total: account.instagram_profiles.count,
  55. story_tray_visible: story_users.length
  56. }
  57. end
  58. end
  59. end
  60. private
  61. attr_reader :account,
  62. :with_recoverable_session,
  63. :with_authenticated_driver,
  64. :collect_conversation_users,
  65. :collect_story_users,
  66. :collect_follow_list,
  67. :upsert_follow_list
  68. def mark_story_visibility!(story_users:)
  69. now = Time.current
  70. story_users.each_key do |username|
  71. profile = account.instagram_profiles.find_by(username: username)
  72. next unless profile
  73. profile.last_story_seen_at = now
  74. profile.recompute_last_active!
  75. profile.save!
  76. profile.record_event!(
  77. kind: "story_seen",
  78. external_id: "story_seen:#{now.utc.to_date.iso8601}",
  79. occurred_at: nil,
  80. metadata: { source: "home_story_tray" }
  81. )
  82. end
  83. end
  84. end
  85. end
  86. end

app/services/instagram/profile_analysis_collector.rb

0.0% lines covered

100.0% branches covered

443 relevant lines. 0 lines covered and 443 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "net/http"
  2. require "digest"
  3. require "set"
  4. module Instagram
  5. class ProfileAnalysisCollector
  6. MAX_POST_IMAGE_BYTES = 6 * 1024 * 1024
  7. MAX_POST_VIDEO_BYTES = 80 * 1024 * 1024
  8. def initialize(account:, profile:)
  9. @account = account
  10. @profile = profile
  11. @client = Instagram::Client.new(account: account)
  12. end
  13. def collect_and_persist!(
  14. posts_limit: nil,
  15. comments_limit: 8,
  16. track_missing_as_deleted: false,
  17. sync_source: "instagram_profile_analysis_dataset",
  18. download_media: true
  19. )
  20. dataset = @client.fetch_profile_analysis_dataset!(
  21. username: @profile.username,
  22. posts_limit: posts_limit,
  23. comments_limit: comments_limit
  24. )
  25. synced_at = Time.current
  26. details = dataset[:profile] || {}
  27. update_profile_from_details!(details)
  28. fetched_shortcodes = Set.new
  29. summary = {
  30. created_count: 0,
  31. updated_count: 0,
  32. unchanged_count: 0,
  33. restored_count: 0,
  34. deleted_count: 0,
  35. created_shortcodes: [],
  36. updated_shortcodes: [],
  37. restored_shortcodes: [],
  38. deleted_shortcodes: [],
  39. analysis_candidate_shortcodes: [],
  40. feed_fetch: dataset[:feed_fetch].is_a?(Hash) ? dataset[:feed_fetch] : {}
  41. }
  42. persisted_posts = Array(dataset[:posts]).map do |post_data|
  43. result = persist_profile_post!(
  44. post_data,
  45. synced_at: synced_at,
  46. sync_source: sync_source,
  47. download_media: ActiveModel::Type::Boolean.new.cast(download_media)
  48. )
  49. next nil unless result
  50. post = result[:post]
  51. fetched_shortcodes << post.shortcode.to_s
  52. case result[:change]
  53. when :created
  54. summary[:created_count] += 1
  55. summary[:created_shortcodes] << post.shortcode.to_s
  56. when :restored
  57. summary[:restored_count] += 1
  58. summary[:restored_shortcodes] << post.shortcode.to_s
  59. when :updated
  60. summary[:updated_count] += 1
  61. summary[:updated_shortcodes] << post.shortcode.to_s
  62. else
  63. summary[:unchanged_count] += 1
  64. end
  65. if result[:analysis_required]
  66. summary[:analysis_candidate_shortcodes] << post.shortcode.to_s
  67. end
  68. post
  69. end.compact
  70. if ActiveModel::Type::Boolean.new.cast(track_missing_as_deleted) && fetched_shortcodes.any?
  71. deleted = mark_missing_posts_as_deleted!(
  72. fetched_shortcodes: fetched_shortcodes,
  73. synced_at: synced_at,
  74. sync_source: sync_source
  75. )
  76. summary[:deleted_count] = deleted[:count]
  77. summary[:deleted_shortcodes] = deleted[:shortcodes]
  78. end
  79. {
  80. details: details,
  81. posts: persisted_posts,
  82. summary: summary.merge(
  83. created_shortcodes: Array(summary[:created_shortcodes]).uniq,
  84. updated_shortcodes: Array(summary[:updated_shortcodes]).uniq,
  85. restored_shortcodes: Array(summary[:restored_shortcodes]).uniq,
  86. deleted_shortcodes: Array(summary[:deleted_shortcodes]).uniq,
  87. analysis_candidate_shortcodes: Array(summary[:analysis_candidate_shortcodes]).uniq
  88. )
  89. }
  90. end
  91. private
  92. def update_profile_from_details!(details)
  93. attrs = {
  94. display_name: details[:display_name].presence || @profile.display_name,
  95. profile_pic_url: details[:profile_pic_url].presence || @profile.profile_pic_url,
  96. ig_user_id: details[:ig_user_id].presence || @profile.ig_user_id,
  97. bio: details[:bio].presence || @profile.bio,
  98. followers_count: normalize_count(details[:followers_count]) || @profile.followers_count,
  99. last_post_at: details[:last_post_at].presence || @profile.last_post_at
  100. }
  101. @profile.update!(attrs)
  102. @profile.recompute_last_active!
  103. @profile.save!
  104. end
  105. def normalize_count(value)
  106. text = value.to_s.strip
  107. return nil unless text.match?(/\A\d+\z/)
  108. text.to_i
  109. rescue StandardError
  110. nil
  111. end
  112. def persist_profile_post!(post_data, synced_at:, sync_source:, download_media:)
  113. shortcode = post_data[:shortcode].to_s.strip
  114. return nil if shortcode.blank?
  115. post = @profile.instagram_profile_posts.find_or_initialize_by(shortcode: shortcode)
  116. previous_signature = post_signature(post)
  117. previous_analysis_signature = post_analysis_signature(post)
  118. was_new = post.new_record?
  119. was_deleted = post_deleted?(post)
  120. existing_metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  121. merged_metadata = existing_metadata.merge(
  122. "media_type" => post_data[:media_type],
  123. "media_id" => post_data[:media_id],
  124. "post_kind" => post_data[:post_kind],
  125. "product_type" => post_data[:product_type],
  126. "media_url" => post_data[:media_url].to_s.presence,
  127. "media_url_image" => post_data[:image_url].to_s.presence,
  128. "media_url_video" => post_data[:video_url].to_s.presence,
  129. "image_url" => post_data[:image_url].to_s.presence,
  130. "video_url" => post_data[:video_url].to_s.presence,
  131. "is_repost" => ActiveModel::Type::Boolean.new.cast(post_data[:is_repost]),
  132. "comments_count_api" => post_data[:comments_count],
  133. "source" => sync_source.to_s
  134. )
  135. merged_metadata.delete("deleted_from_source")
  136. merged_metadata.delete("deleted_detected_at")
  137. merged_metadata.delete("deleted_reason")
  138. merged_metadata["restored_at"] = synced_at.utc.iso8601(3) if was_deleted
  139. post.instagram_account = @account
  140. post.taken_at = post_data[:taken_at]
  141. post.caption = post_data[:caption]
  142. post.permalink = post_data[:permalink]
  143. post.source_media_url = post_data[:media_url].presence || post_data[:image_url]
  144. post.likes_count = post_data[:likes_count].to_i
  145. extracted_comments_count = Array(post_data[:comments]).size
  146. api_comments_count = post_data[:comments_count].to_i
  147. post.comments_count = [ extracted_comments_count, api_comments_count ].max
  148. post.last_synced_at = synced_at
  149. post.metadata = merged_metadata
  150. post.save!
  151. if download_media
  152. sync_media!(
  153. post: post,
  154. media_url: post_data[:media_url].presence || post_data[:image_url],
  155. media_id: post_data[:media_id]
  156. )
  157. end
  158. sync_comments!(
  159. post: post,
  160. comments: post_data[:comments],
  161. expected_comments_count: post_data[:comments_count]
  162. )
  163. current_signature = post_signature(post.reload)
  164. current_analysis_signature = post_analysis_signature(post)
  165. changed = (previous_signature != current_signature)
  166. change =
  167. if was_new
  168. :created
  169. elsif was_deleted
  170. :restored
  171. elsif changed
  172. :updated
  173. else
  174. :unchanged
  175. end
  176. analysis_required =
  177. was_new ||
  178. was_deleted ||
  179. (previous_analysis_signature != current_analysis_signature) ||
  180. post.ai_status.to_s != "analyzed" ||
  181. post.analyzed_at.blank?
  182. if analysis_required && (post.ai_status.to_s != "pending" || post.analyzed_at.present?)
  183. post.update_columns(ai_status: "pending", analyzed_at: nil, updated_at: Time.current)
  184. end
  185. { post: post, change: change, analysis_required: analysis_required }
  186. end
  187. def sync_media!(post:, media_url:, media_id: nil)
  188. url = media_url.to_s.strip
  189. return false if url.blank?
  190. incoming_media_id = media_id.to_s.strip
  191. existing_metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  192. existing_media_id = existing_metadata["media_id"].to_s.strip
  193. if post.media.attached? && incoming_media_id.present? && existing_media_id.present? && incoming_media_id == existing_media_id
  194. return false
  195. end
  196. fp = Digest::SHA256.hexdigest(url)
  197. return false if post.media.attached? && post.media_url_fingerprint.to_s == fp
  198. if attach_media_from_local_cache!(post: post, incoming_media_id: incoming_media_id, fingerprint: fp)
  199. return true
  200. end
  201. io, content_type, filename = download_media(url)
  202. blob = ActiveStorage::Blob.create_and_upload!(
  203. io: io,
  204. filename: filename,
  205. content_type: content_type,
  206. identify: false
  207. )
  208. attach_blob_to_post!(post: post, blob: blob)
  209. post.update!(media_url_fingerprint: fp)
  210. true
  211. rescue StandardError => e
  212. Rails.logger.warn("[ProfileAnalysisCollector] media sync failed for shortcode=#{post.shortcode}: #{e.class}: #{e.message}")
  213. false
  214. ensure
  215. io&.close if defined?(io) && io.respond_to?(:close)
  216. end
  217. def attach_media_from_local_cache!(post:, incoming_media_id:, fingerprint:)
  218. blob = cached_profile_post_blob(post: post, incoming_media_id: incoming_media_id)
  219. return false unless blob
  220. return false unless blob_integrity_for(blob)[:valid]
  221. if post.media.attached? && post.media.blob_id == blob.id
  222. if post.media_url_fingerprint.to_s != fingerprint
  223. post.update!(media_url_fingerprint: fingerprint)
  224. return true
  225. end
  226. return false
  227. end
  228. attach_blob_to_post!(post: post, blob: blob)
  229. post.update!(media_url_fingerprint: fingerprint)
  230. true
  231. rescue StandardError => e
  232. Rails.logger.warn("[ProfileAnalysisCollector] local media cache attach failed for shortcode=#{post.shortcode}: #{e.class}: #{e.message}")
  233. false
  234. end
  235. def cached_profile_post_blob(post:, incoming_media_id:)
  236. by_media_id = cached_blob_from_profile_posts(post: post, incoming_media_id: incoming_media_id)
  237. return by_media_id if by_media_id
  238. by_shortcode = cached_blob_from_profile_posts(post: post, incoming_media_id: nil)
  239. return by_shortcode if by_shortcode
  240. cached_blob_from_feed_posts(shortcode: post.shortcode)
  241. end
  242. def cached_blob_from_profile_posts(post:, incoming_media_id:)
  243. scope = InstagramProfilePost.joins(:media_attachment).where.not(id: post.id)
  244. if incoming_media_id.to_s.present?
  245. scope = scope.where("metadata ->> 'media_id' = ?", incoming_media_id.to_s)
  246. else
  247. shortcode = post.shortcode.to_s.strip
  248. return nil if shortcode.blank?
  249. scope = scope.where(shortcode: shortcode)
  250. end
  251. scope.order(updated_at: :desc, id: :desc).each do |candidate|
  252. next unless candidate&.media&.attached?
  253. blob = candidate.media.blob
  254. return blob if blob_integrity_for(blob)[:valid]
  255. end
  256. nil
  257. end
  258. def cached_blob_from_feed_posts(shortcode:)
  259. value = shortcode.to_s.strip
  260. return nil if value.blank?
  261. InstagramPost
  262. .joins(:media_attachment)
  263. .where(shortcode: value)
  264. .order(media_downloaded_at: :desc, id: :desc)
  265. .each do |candidate|
  266. next unless candidate&.media&.attached?
  267. blob = candidate.media.blob
  268. return blob if blob_integrity_for(blob)[:valid]
  269. end
  270. nil
  271. end
  272. def sync_comments!(post:, comments:, expected_comments_count:)
  273. entries = Array(comments).first(20)
  274. normalized_entries = entries.filter_map do |c|
  275. body = c[:text].to_s.strip
  276. next if body.blank?
  277. [c[:author_username].to_s.strip.presence, body, c[:created_at]&.to_i]
  278. end
  279. existing_entries = post.instagram_profile_post_comments.order(:id).map do |comment|
  280. [comment.author_username.to_s.strip.presence, comment.body.to_s.strip, comment.commented_at&.to_i]
  281. end
  282. return if normalized_entries == existing_entries && normalized_entries.any?
  283. if entries.empty?
  284. # Keep previously captured comments when this sync could not fetch them.
  285. # Only clear if the source explicitly reports no comments.
  286. if expected_comments_count.to_i <= 0
  287. post.instagram_profile_post_comments.delete_all
  288. end
  289. return
  290. end
  291. post.instagram_profile_post_comments.delete_all
  292. entries.each do |c|
  293. body = c[:text].to_s.strip
  294. next if body.blank?
  295. post.instagram_profile_post_comments.create!(
  296. instagram_profile: @profile,
  297. author_username: c[:author_username].to_s.strip.presence,
  298. body: body,
  299. commented_at: c[:created_at],
  300. metadata: { source: "instagram_feed_preview" }
  301. )
  302. end
  303. end
  304. def download_media(url, redirects_left: 4)
  305. uri = URI.parse(url)
  306. raise "invalid media URL" unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
  307. http = Net::HTTP.new(uri.host, uri.port)
  308. http.use_ssl = (uri.scheme == "https")
  309. http.open_timeout = 10
  310. http.read_timeout = 30
  311. req = Net::HTTP::Get.new(uri.request_uri)
  312. req["Accept"] = "*/*"
  313. req["User-Agent"] = @account.user_agent.presence || "Mozilla/5.0"
  314. req["Referer"] = Instagram::Client::INSTAGRAM_BASE_URL
  315. res = http.request(req)
  316. if res.is_a?(Net::HTTPRedirection) && res["location"].present?
  317. raise "too many redirects" if redirects_left.to_i <= 0
  318. redirected_url = normalize_redirect_url(base_uri: uri, location: res["location"])
  319. raise "invalid redirect URL" if redirected_url.blank?
  320. return download_media(redirected_url, redirects_left: redirects_left.to_i - 1)
  321. end
  322. raise "media download failed: HTTP #{res.code}" unless res.is_a?(Net::HTTPSuccess)
  323. body = res.body.to_s
  324. content_type = res["content-type"].to_s.split(";").first.presence || "application/octet-stream"
  325. size_limit = content_type.start_with?("video/") ? MAX_POST_VIDEO_BYTES : MAX_POST_IMAGE_BYTES
  326. raise "empty media payload" if body.bytesize <= 0
  327. raise "media too large" if body.bytesize > size_limit
  328. raise "unexpected html payload" if html_payload?(body)
  329. validate_known_signature!(body: body, content_type: content_type)
  330. ext = extension_for_content_type(content_type: content_type)
  331. io = StringIO.new(body)
  332. io.set_encoding(Encoding::BINARY) if io.respond_to?(:set_encoding)
  333. [io, content_type, "profile_post_#{Digest::SHA256.hexdigest(url)[0, 12]}.#{ext}"]
  334. end
  335. def normalize_redirect_url(base_uri:, location:)
  336. target = URI.join(base_uri.to_s, location.to_s).to_s
  337. uri = URI.parse(target)
  338. return nil unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
  339. uri.to_s
  340. rescue URI::InvalidURIError, ArgumentError
  341. nil
  342. end
  343. def extension_for_content_type(content_type:)
  344. return "jpg" if content_type.include?("jpeg")
  345. return "png" if content_type.include?("png")
  346. return "webp" if content_type.include?("webp")
  347. return "gif" if content_type.include?("gif")
  348. return "mp4" if content_type.include?("mp4")
  349. return "mov" if content_type.include?("quicktime")
  350. "bin"
  351. end
  352. def blob_integrity_for(blob)
  353. return { valid: false, reason: "missing_blob" } unless blob
  354. return { valid: false, reason: "non_positive_byte_size" } if blob.byte_size.to_i <= 0
  355. service = blob.service
  356. if service.respond_to?(:path_for, true)
  357. path = service.send(:path_for, blob.key)
  358. return { valid: false, reason: "missing_file_on_disk" } unless path && File.exist?(path)
  359. file_size = File.size(path)
  360. return { valid: false, reason: "zero_byte_file" } if file_size <= 0
  361. return { valid: false, reason: "byte_size_mismatch" } if blob.byte_size.to_i.positive? && file_size != blob.byte_size.to_i
  362. end
  363. { valid: true, reason: nil }
  364. rescue StandardError => e
  365. { valid: false, reason: "integrity_check_error: #{e.class}" }
  366. end
  367. def html_payload?(body)
  368. sample = body.to_s.byteslice(0, 4096).to_s.downcase
  369. sample.include?("<html") || sample.start_with?("<!doctype html")
  370. end
  371. def validate_known_signature!(body:, content_type:)
  372. type = content_type.to_s.downcase
  373. return if type.blank?
  374. return if type.include?("octet-stream")
  375. case
  376. when type.include?("jpeg")
  377. raise "invalid jpeg signature" unless body.start_with?("\xFF\xD8".b)
  378. when type.include?("png")
  379. raise "invalid png signature" unless body.start_with?("\x89PNG\r\n\x1A\n".b)
  380. when type.include?("gif")
  381. raise "invalid gif signature" unless body.start_with?("GIF87a".b) || body.start_with?("GIF89a".b)
  382. when type.include?("webp")
  383. raise "invalid webp signature" unless body.bytesize >= 12 && body.byteslice(0, 4) == "RIFF" && body.byteslice(8, 4) == "WEBP"
  384. when type.start_with?("video/")
  385. raise "invalid video signature" unless body.bytesize >= 12 && body.byteslice(4, 4) == "ftyp"
  386. end
  387. end
  388. def attach_blob_to_post!(post:, blob:)
  389. raise "missing blob for attach" unless blob
  390. if post.media.attached? && post.media.attachment.present?
  391. attachment = post.media.attachment
  392. attachment.update!(blob: blob) if attachment.blob_id != blob.id
  393. return
  394. end
  395. post.media.attach(blob)
  396. end
  397. def mark_missing_posts_as_deleted!(fetched_shortcodes:, synced_at:, sync_source:)
  398. missing = @profile.instagram_profile_posts.where.not(shortcode: fetched_shortcodes.to_a)
  399. shortcodes = []
  400. missing.find_each do |post|
  401. next if post_deleted?(post)
  402. metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  403. metadata["deleted_from_source"] = true
  404. metadata["deleted_detected_at"] = synced_at.utc.iso8601(3)
  405. metadata["deleted_reason"] = "missing_from_latest_capture"
  406. metadata["source"] = sync_source.to_s
  407. post.update!(metadata: metadata, last_synced_at: synced_at)
  408. shortcodes << post.shortcode.to_s
  409. end
  410. { count: shortcodes.length, shortcodes: shortcodes }
  411. end
  412. def post_deleted?(post)
  413. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  414. ActiveModel::Type::Boolean.new.cast(metadata["deleted_from_source"])
  415. end
  416. def post_signature(post)
  417. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  418. {
  419. shortcode: post.shortcode.to_s,
  420. taken_at: post.taken_at&.utc&.iso8601(3),
  421. caption: post.caption.to_s,
  422. permalink: post.permalink.to_s,
  423. source_media_url: post.source_media_url.to_s,
  424. likes_count: post.likes_count.to_i,
  425. comments_count: post.comments_count.to_i,
  426. media_url_fingerprint: post.media_url_fingerprint.to_s,
  427. media_id: metadata["media_id"].to_s,
  428. media_type: metadata["media_type"].to_s,
  429. deleted_from_source: ActiveModel::Type::Boolean.new.cast(metadata["deleted_from_source"])
  430. }
  431. end
  432. def post_analysis_signature(post)
  433. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  434. {
  435. shortcode: post.shortcode.to_s,
  436. taken_at: post.taken_at&.utc&.iso8601(3),
  437. caption: post.caption.to_s,
  438. source_media_url: post.source_media_url.to_s,
  439. media_url_fingerprint: post.media_url_fingerprint.to_s,
  440. media_id: metadata["media_id"].to_s,
  441. media_type: metadata["media_type"].to_s
  442. }
  443. end
  444. end
  445. end

app/services/instagram/profile_scan_policy.rb

0.0% lines covered

100.0% branches covered

209 relevant lines. 0 lines covered and 209 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Instagram
  2. class ProfileScanPolicy
  3. DEFAULT_MAX_FOLLOWERS = 20_000
  4. EXCLUDED_SCAN_TAG = "profile_scan_excluded".freeze
  5. PERSONAL_OVERRIDE_TAGS = %w[personal_user friend female_friend male_friend relative].freeze
  6. NON_PERSONAL_PAGE_KEYWORDS = %w[
  7. meme memes
  8. quote quotes
  9. facts fact
  10. news updates
  11. media entertainment
  12. viral humor funny
  13. giveaway deals
  14. shop store brand
  15. fanpage
  16. ].freeze
  17. NON_PERSONAL_PAGE_USERNAME_HINTS = %w[
  18. official
  19. store shop brand media news magazine
  20. fanpage memes meme quotes facts
  21. deals promo sale
  22. clips reposts updates daily
  23. business company agency studio
  24. ].freeze
  25. NON_PERSONAL_BIO_CTA_HINTS = [
  26. "link in bio",
  27. "dm for collab",
  28. "dm for promo",
  29. "for business inquiries",
  30. "order now",
  31. "shop now",
  32. "customer care",
  33. "whatsapp",
  34. "telegram",
  35. "booking"
  36. ].freeze
  37. NON_PERSONAL_CATEGORY_PATTERN = /\b(media|news|entertainment|publisher|brand|store|shop|business|company|organization|community|product\/service)\b/.freeze
  38. def self.max_followers_threshold
  39. configured = Rails.application.config.x.instagram.profile_scan_max_followers
  40. value = parse_integer(configured)
  41. return value if value.to_i.positive?
  42. DEFAULT_MAX_FOLLOWERS
  43. end
  44. def self.skip_from_cached_profile?(profile:)
  45. decision = new(profile: profile).decision
  46. ActiveModel::Type::Boolean.new.cast(decision[:skip_scan])
  47. rescue StandardError
  48. false
  49. end
  50. def self.build_skip_post_analysis_payload(decision:)
  51. data = decision.is_a?(Hash) ? decision : {}
  52. {
  53. "skipped" => true,
  54. "policy" => "profile_scan_policy_v1",
  55. "reason_code" => data[:reason_code].to_s.presence || data["reason_code"].to_s.presence || "scan_policy_blocked",
  56. "reason" => data[:reason].to_s.presence || data["reason"].to_s.presence || "Post analysis skipped by profile scan policy.",
  57. "followers_count" => parse_integer(data[:followers_count] || data["followers_count"]),
  58. "max_allowed_followers" => parse_integer(data[:max_followers] || data["max_followers"]) || max_followers_threshold,
  59. "decided_at" => Time.current.iso8601
  60. }.compact
  61. end
  62. def self.mark_post_analysis_skipped!(post:, decision:)
  63. payload = build_skip_post_analysis_payload(decision: decision)
  64. existing = post.analysis.is_a?(Hash) ? post.analysis.deep_dup : {}
  65. post.update!(
  66. ai_status: "analyzed",
  67. analyzed_at: Time.current,
  68. ai_provider: "policy",
  69. ai_model: "profile_scan_policy_v1",
  70. analysis: existing.merge(payload)
  71. )
  72. end
  73. def self.mark_scan_excluded!(profile:)
  74. tag = ProfileTag.find_or_create_by!(name: EXCLUDED_SCAN_TAG)
  75. return if profile.profile_tags.exists?(id: tag.id)
  76. profile.profile_tags << tag
  77. end
  78. def self.clear_scan_excluded!(profile:)
  79. tag = ProfileTag.find_by(name: EXCLUDED_SCAN_TAG)
  80. return unless tag
  81. profile.profile_tags.destroy(tag) if profile.profile_tags.exists?(id: tag.id)
  82. end
  83. def initialize(profile:, profile_details: nil, max_followers: nil)
  84. @profile = profile
  85. @profile_details = profile_details.is_a?(Hash) ? profile_details.deep_symbolize_keys : {}
  86. @max_followers = self.class.parse_integer(max_followers) || self.class.max_followers_threshold
  87. end
  88. def decision
  89. @decision ||= evaluate
  90. end
  91. private
  92. def evaluate
  93. followers_count = resolved_followers_count
  94. if followers_count.to_i.positive? && followers_count > @max_followers.to_i
  95. return build_decision(
  96. skip_scan: true,
  97. skip_post_analysis: true,
  98. reason_code: "followers_threshold_exceeded",
  99. reason: "followers_count #{followers_count} exceeds max allowed #{@max_followers}.",
  100. followers_count: followers_count,
  101. max_followers: @max_followers
  102. )
  103. end
  104. if scan_excluded_tagged?
  105. return build_decision(
  106. skip_scan: true,
  107. skip_post_analysis: true,
  108. reason_code: "scan_excluded_tag",
  109. reason: "Profile tagged as scan-excluded.",
  110. followers_count: followers_count,
  111. max_followers: @max_followers
  112. )
  113. end
  114. if non_personal_page?
  115. return build_decision(
  116. skip_scan: true,
  117. skip_post_analysis: true,
  118. reason_code: "non_personal_profile_page",
  119. reason: "Profile appears to be a non-personal page (meme/news/info style).",
  120. followers_count: followers_count,
  121. max_followers: @max_followers
  122. )
  123. end
  124. build_decision(
  125. skip_scan: false,
  126. skip_post_analysis: false,
  127. reason_code: "scan_allowed",
  128. reason: "Profile eligible for scan and post analysis.",
  129. followers_count: followers_count,
  130. max_followers: @max_followers
  131. )
  132. end
  133. def build_decision(skip_scan:, skip_post_analysis:, reason_code:, reason:, followers_count:, max_followers:)
  134. {
  135. skip_scan: ActiveModel::Type::Boolean.new.cast(skip_scan),
  136. skip_post_analysis: ActiveModel::Type::Boolean.new.cast(skip_post_analysis),
  137. reason_code: reason_code.to_s,
  138. reason: reason.to_s,
  139. followers_count: followers_count,
  140. max_followers: max_followers.to_i
  141. }
  142. end
  143. def resolved_followers_count
  144. from_details = self.class.parse_integer(@profile_details[:followers_count])
  145. return from_details if from_details.to_i.positive?
  146. from_profile = self.class.parse_integer(@profile&.followers_count)
  147. return from_profile if from_profile.to_i.positive?
  148. 0
  149. end
  150. def scan_excluded_tagged?
  151. profile_tag_names.include?(EXCLUDED_SCAN_TAG)
  152. end
  153. def non_personal_page?
  154. return false if personal_override_tagged?
  155. combined = [
  156. @profile&.username,
  157. @profile&.display_name,
  158. @profile&.bio,
  159. @profile_details[:username],
  160. @profile_details[:display_name],
  161. @profile_details[:bio],
  162. @profile_details[:category_name]
  163. ].map(&:to_s).join(" ").downcase
  164. return false if combined.blank?
  165. keyword_hits = NON_PERSONAL_PAGE_KEYWORDS.count { |keyword| combined.include?(keyword) }
  166. username_blob = [ @profile&.username, @profile_details[:username], @profile&.display_name, @profile_details[:display_name] ].map(&:to_s).join(" ").downcase
  167. username_hits = NON_PERSONAL_PAGE_USERNAME_HINTS.count { |keyword| username_blob.include?(keyword) }
  168. bio_blob = [ @profile&.bio, @profile_details[:bio] ].map(&:to_s).join(" ").downcase
  169. cta_hits = NON_PERSONAL_BIO_CTA_HINTS.count { |keyword| bio_blob.include?(keyword) }
  170. category = @profile_details[:category_name].to_s.downcase
  171. business = ActiveModel::Type::Boolean.new.cast(@profile_details[:is_business_account])
  172. professional = ActiveModel::Type::Boolean.new.cast(@profile_details[:is_professional_account])
  173. verified = ActiveModel::Type::Boolean.new.cast(@profile_details[:is_verified])
  174. business_like = business || professional
  175. has_external_link = @profile_details[:external_url].to_s.present?
  176. return true if business_like && category.match?(NON_PERSONAL_CATEGORY_PATTERN)
  177. return true if business_like && (keyword_hits + username_hits + cta_hits >= 2)
  178. return true if category.match?(NON_PERSONAL_CATEGORY_PATTERN) && (keyword_hits + username_hits >= 2)
  179. return true if keyword_hits >= 3
  180. return true if username_hits >= 2 && (cta_hits.positive? || has_external_link)
  181. return true if verified && category.match?(NON_PERSONAL_CATEGORY_PATTERN) && keyword_hits.positive?
  182. false
  183. end
  184. def personal_override_tagged?
  185. profile_tag_names.any? { |name| PERSONAL_OVERRIDE_TAGS.include?(name) }
  186. end
  187. def profile_tag_names
  188. @profile_tag_names ||= begin
  189. return [] unless @profile
  190. if @profile.association(:profile_tags).loaded?
  191. @profile.profile_tags.map { |tag| tag.name.to_s }
  192. else
  193. @profile.profile_tags.pluck(:name)
  194. end
  195. end
  196. rescue StandardError
  197. []
  198. end
  199. def self.parse_integer(value)
  200. return nil if value.nil?
  201. text = value.to_s.strip
  202. return nil if text.blank?
  203. return nil unless text.match?(/\A-?\d+\z/)
  204. text.to_i
  205. rescue StandardError
  206. nil
  207. end
  208. end
  209. end

app/services/instagram_accounts/dashboard_snapshot_service.rb

0.0% lines covered

100.0% branches covered

62 relevant lines. 0 lines covered and 62 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module InstagramAccounts
  2. class DashboardSnapshotService
  3. DEFAULT_FAILURE_LIMIT = 25
  4. DEFAULT_AUDIT_LIMIT = 120
  5. DEFAULT_ACTION_LIMIT = 20
  6. DEFAULT_SKIP_WINDOW_HOURS = 72
  7. def initialize(
  8. account:,
  9. failure_limit: DEFAULT_FAILURE_LIMIT,
  10. audit_limit: DEFAULT_AUDIT_LIMIT,
  11. action_limit: DEFAULT_ACTION_LIMIT,
  12. skip_window_hours: DEFAULT_SKIP_WINDOW_HOURS
  13. )
  14. @account = account
  15. @failure_limit = failure_limit.to_i.clamp(1, 200)
  16. @audit_limit = audit_limit.to_i.clamp(1, 500)
  17. @action_limit = action_limit.to_i.clamp(1, 120)
  18. @skip_window_hours = skip_window_hours.to_i.clamp(1, 168)
  19. end
  20. def call
  21. {
  22. issues: Ops::AccountIssues.for(account),
  23. metrics: Ops::Metrics.for_account(account),
  24. latest_sync_run: account.sync_runs.order(created_at: :desc).first,
  25. recent_failures: recent_failures,
  26. recent_audit_entries: Ops::AuditLogBuilder.for_account(instagram_account: account, limit: audit_limit),
  27. actions_todo_queue: actions_todo_queue_summary,
  28. skip_diagnostics: skip_diagnostics
  29. }
  30. end
  31. private
  32. attr_reader :account, :failure_limit, :audit_limit, :action_limit, :skip_window_hours
  33. def recent_failures
  34. BackgroundJobFailure
  35. .where(instagram_account_id: account.id)
  36. .order(occurred_at: :desc, id: :desc)
  37. .limit(failure_limit)
  38. end
  39. def actions_todo_queue_summary
  40. Workspace::ActionsTodoQueueService.new(
  41. account: account,
  42. limit: action_limit,
  43. enqueue_processing: true
  44. ).fetch!
  45. rescue StandardError => e
  46. {
  47. items: [],
  48. stats: {
  49. total_items: 0,
  50. ready_items: 0,
  51. processing_items: 0,
  52. enqueued_now: 0,
  53. refreshed_at: Time.current.iso8601(3),
  54. error: e.message.to_s
  55. }
  56. }
  57. end
  58. def skip_diagnostics
  59. SkipDiagnosticsService.new(account: account, hours: skip_window_hours).call
  60. end
  61. end
  62. end

app/services/instagram_accounts/llm_comment_request_service.rb

0.0% lines covered

100.0% branches covered

123 relevant lines. 0 lines covered and 123 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module InstagramAccounts
  2. class LlmCommentRequestService
  3. Result = Struct.new(:payload, :status, keyword_init: true)
  4. def initialize(account:, event_id:, provider:, model:, status_only:, queue_inspector: LlmQueueInspector.new)
  5. @account = account
  6. @event_id = event_id
  7. @provider = provider.to_s
  8. @model = model
  9. @status_only = ActiveModel::Type::Boolean.new.cast(status_only)
  10. @queue_inspector = queue_inspector
  11. end
  12. def call
  13. event = InstagramProfileEvent.find(event_id)
  14. return not_found_result unless accessible_event?(event)
  15. if event.has_llm_generated_comment?
  16. event.update_column(:llm_comment_status, "completed") if event.llm_comment_status.to_s != "completed"
  17. return completed_result(event)
  18. end
  19. if event.llm_comment_in_progress?
  20. if queue_inspector.stale_comment_job?(event: event)
  21. event.update_columns(
  22. llm_comment_status: "failed",
  23. llm_comment_last_error: "Previous generation job appears stalled. Please retry.",
  24. updated_at: Time.current
  25. )
  26. event.reload
  27. else
  28. return in_progress_result(event)
  29. end
  30. end
  31. return status_result(event) if status_only
  32. job = GenerateLlmCommentJob.perform_later(
  33. instagram_profile_event_id: event.id,
  34. provider: provider,
  35. model: model,
  36. requested_by: "dashboard_manual_request"
  37. )
  38. event.queue_llm_comment_generation!(job_id: job.job_id)
  39. Result.new(
  40. payload: {
  41. success: true,
  42. status: "queued",
  43. event_id: event.id,
  44. job_id: job.job_id,
  45. estimated_seconds: llm_comment_estimated_seconds(event: event, include_queue: true),
  46. queue_size: ai_queue_size
  47. },
  48. status: :accepted
  49. )
  50. rescue StandardError => e
  51. Result.new(payload: { error: e.message }, status: :unprocessable_entity)
  52. end
  53. private
  54. attr_reader :account, :event_id, :provider, :model, :status_only, :queue_inspector
  55. def accessible_event?(event)
  56. event.story_archive_item? && event.instagram_profile&.instagram_account_id == account.id
  57. end
  58. def not_found_result
  59. Result.new(payload: { error: "Event not found or not accessible" }, status: :not_found)
  60. end
  61. def completed_result(event)
  62. Result.new(
  63. payload: {
  64. success: true,
  65. status: "completed",
  66. event_id: event.id,
  67. llm_generated_comment: event.llm_generated_comment,
  68. llm_comment_generated_at: event.llm_comment_generated_at,
  69. llm_comment_model: event.llm_comment_model,
  70. llm_comment_provider: event.llm_comment_provider,
  71. llm_comment_relevance_score: event.llm_comment_relevance_score
  72. },
  73. status: :ok
  74. )
  75. end
  76. def in_progress_result(event)
  77. Result.new(
  78. payload: {
  79. success: true,
  80. status: event.llm_comment_status,
  81. event_id: event.id,
  82. job_id: event.llm_comment_job_id,
  83. estimated_seconds: llm_comment_estimated_seconds(event: event),
  84. queue_size: ai_queue_size
  85. },
  86. status: :accepted
  87. )
  88. end
  89. def status_result(event)
  90. Result.new(
  91. payload: {
  92. success: true,
  93. status: event.llm_comment_status.presence || "not_requested",
  94. event_id: event.id,
  95. estimated_seconds: llm_comment_estimated_seconds(event: event),
  96. queue_size: ai_queue_size
  97. },
  98. status: :ok
  99. )
  100. end
  101. def llm_comment_estimated_seconds(event:, include_queue: false)
  102. base = 18
  103. queue_factor = include_queue ? (ai_queue_size * 4) : 0
  104. attempt_factor = event.llm_comment_attempts.to_i * 6
  105. preprocess_factor = story_local_context_preprocess_penalty(event: event)
  106. (base + queue_factor + attempt_factor + preprocess_factor).clamp(10, 240)
  107. end
  108. def story_local_context_preprocess_penalty(event:)
  109. metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
  110. has_context = metadata["local_story_intelligence"].is_a?(Hash) ||
  111. metadata["ocr_text"].to_s.present? ||
  112. Array(metadata["content_signals"]).any?
  113. return 0 if has_context
  114. media_type = event.media&.blob&.content_type.to_s.presence || metadata["media_content_type"].to_s
  115. media_type.start_with?("image/") ? 16 : 8
  116. rescue StandardError
  117. 0
  118. end
  119. def ai_queue_size
  120. queue_inspector.queue_size
  121. end
  122. end
  123. end

app/services/instagram_accounts/llm_queue_inspector.rb

0.0% lines covered

100.0% branches covered

55 relevant lines. 0 lines covered and 55 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module InstagramAccounts
  2. class LlmQueueInspector
  3. STALE_AFTER = 5.minutes
  4. def queue_size
  5. return 0 unless sidekiq_adapter?
  6. require "sidekiq/api"
  7. Sidekiq::Queue.new("ai").size.to_i
  8. rescue StandardError
  9. 0
  10. end
  11. def stale_comment_job?(event:)
  12. return false unless event.llm_comment_in_progress?
  13. return false if event.updated_at && event.updated_at > STALE_AFTER.ago
  14. return false unless sidekiq_adapter?
  15. require "sidekiq/api"
  16. job_id = event.llm_comment_job_id.to_s
  17. event_marker = "instagram_profile_event_id\"=>#{event.id}"
  18. return false if currently_busy?(job_id: job_id, event_marker: event_marker)
  19. return false if queued?(job_id: job_id, event_marker: event_marker)
  20. return false if retrying?(job_id: job_id, event_marker: event_marker)
  21. return false if scheduled?(job_id: job_id, event_marker: event_marker)
  22. true
  23. rescue StandardError
  24. false
  25. end
  26. private
  27. def sidekiq_adapter?
  28. Rails.application.config.active_job.queue_adapter.to_s == "sidekiq"
  29. end
  30. def currently_busy?(job_id:, event_marker:)
  31. Sidekiq::Workers.new.any? do |_pid, _tid, work|
  32. payload = work["payload"].to_s
  33. payload.include?(job_id) || payload.include?(event_marker)
  34. end
  35. end
  36. def queued?(job_id:, event_marker:)
  37. Sidekiq::Queue.new("ai").any? do |job|
  38. payload = job.item.to_s
  39. payload.include?(job_id) || payload.include?(event_marker)
  40. end
  41. end
  42. def retrying?(job_id:, event_marker:)
  43. Sidekiq::RetrySet.new.any? do |job|
  44. payload = job.item.to_s
  45. payload.include?(job_id) || payload.include?(event_marker)
  46. end
  47. end
  48. def scheduled?(job_id:, event_marker:)
  49. Sidekiq::ScheduledSet.new.any? do |job|
  50. payload = job.item.to_s
  51. payload.include?(job_id) || payload.include?(event_marker)
  52. end
  53. end
  54. end
  55. end

app/services/instagram_accounts/skip_diagnostics_service.rb

0.0% lines covered

100.0% branches covered

65 relevant lines. 0 lines covered and 65 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module InstagramAccounts
  2. class SkipDiagnosticsService
  3. VALID_REASONS = %w[
  4. profile_not_in_network
  5. duplicate_story_already_replied
  6. invalid_story_media
  7. interaction_retry_window_active
  8. missing_auto_reply_tag
  9. external_profile_link_detected
  10. ].freeze
  11. REVIEW_REASONS = %w[
  12. reply_box_not_found
  13. comment_submit_failed
  14. next_navigation_failed
  15. story_context_missing
  16. reply_precheck_error
  17. ].freeze
  18. def initialize(account:, hours:)
  19. @account = account
  20. @hours = hours.to_i
  21. end
  22. def call
  23. scope = base_scope
  24. reason_rows = Hash.new(0)
  25. scope.limit(5_000).each do |event|
  26. metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
  27. reason = metadata["reason"].to_s.presence || event.kind.to_s.presence || "unknown"
  28. reason_rows[reason] += 1
  29. end
  30. {
  31. window_hours: hours,
  32. total: scope.count,
  33. by_reason: build_reasons(reason_rows)
  34. }
  35. rescue StandardError
  36. { window_hours: hours, total: 0, by_reason: [] }
  37. end
  38. private
  39. attr_reader :account, :hours
  40. def base_scope
  41. InstagramProfileEvent
  42. .joins(:instagram_profile)
  43. .where(instagram_profiles: { instagram_account_id: account.id })
  44. .where(kind: %w[story_reply_skipped story_sync_failed story_ad_skipped])
  45. .where("detected_at >= ?", hours.hours.ago)
  46. end
  47. def build_reasons(reason_rows)
  48. reason_rows
  49. .sort_by { |_reason, count| -count }
  50. .map do |reason, count|
  51. {
  52. reason: reason,
  53. count: count.to_i,
  54. classification: classification_for(reason)
  55. }
  56. end
  57. end
  58. def classification_for(reason)
  59. return "valid" if VALID_REASONS.include?(reason)
  60. return "review" if REVIEW_REASONS.include?(reason)
  61. return "valid" if reason.include?("ad") || reason.include?("sponsored")
  62. "review"
  63. end
  64. end
  65. end

app/services/instagram_accounts/story_archive_item_serializer.rb

0.0% lines covered

100.0% branches covered

112 relevant lines. 0 lines covered and 112 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module InstagramAccounts
  2. class StoryArchiveItemSerializer
  3. DEFAULT_PREVIEW_ENQUEUE_TTL_SECONDS = Integer(ENV.fetch("STORY_ARCHIVE_PREVIEW_ENQUEUE_TTL_SECONDS", "900"))
  4. def initialize(event:, preview_enqueue_ttl_seconds: DEFAULT_PREVIEW_ENQUEUE_TTL_SECONDS)
  5. @event = event
  6. @preview_enqueue_ttl_seconds = preview_enqueue_ttl_seconds.to_i
  7. end
  8. def call
  9. metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
  10. llm_meta = event.llm_comment_metadata.is_a?(Hash) ? event.llm_comment_metadata : {}
  11. ownership_data = extract_ownership_data(metadata: metadata, llm_meta: llm_meta)
  12. blob = event.media.blob
  13. profile = event.instagram_profile
  14. story_posted_at = metadata["upload_time"].presence || metadata["taken_at"].presence
  15. downloaded_at = metadata["downloaded_at"].presence || event.occurred_at&.iso8601
  16. {
  17. id: event.id,
  18. profile_id: event.instagram_profile_id,
  19. profile_username: profile&.username.to_s,
  20. profile_display_name: profile&.display_name.to_s.presence || profile&.username.to_s,
  21. profile_avatar_url: profile_avatar_url(profile),
  22. app_profile_url: event.instagram_profile_id ? Rails.application.routes.url_helpers.instagram_profile_path(event.instagram_profile_id) : nil,
  23. instagram_profile_url: profile&.username.present? ? "https://www.instagram.com/#{profile.username}/" : nil,
  24. story_posted_at: story_posted_at,
  25. downloaded_at: downloaded_at,
  26. media_url: blob_path(event.media),
  27. media_download_url: blob_path(event.media, disposition: "attachment"),
  28. media_content_type: blob&.content_type.to_s.presence || metadata["media_content_type"].to_s,
  29. media_preview_image_url: media_preview_image_url(metadata: metadata),
  30. video_static_frame_only: StoryArchive::MediaPreviewResolver.static_video_preview?(metadata: metadata),
  31. media_bytes: metadata["media_bytes"].to_i.positive? ? metadata["media_bytes"].to_i : blob&.byte_size.to_i,
  32. media_width: metadata["media_width"],
  33. media_height: metadata["media_height"],
  34. story_id: metadata["story_id"].to_s,
  35. story_url: metadata["story_url"].to_s.presence || metadata["permalink"].to_s.presence,
  36. reply_comment: metadata["reply_comment"].to_s.presence,
  37. skipped: ActiveModel::Type::Boolean.new.cast(metadata["skipped"]),
  38. skip_reason: metadata["skip_reason"].to_s.presence,
  39. llm_generated_comment: event.llm_generated_comment,
  40. llm_comment_generated_at: event.llm_comment_generated_at&.iso8601,
  41. llm_comment_model: event.llm_comment_model,
  42. llm_comment_provider: event.llm_comment_provider,
  43. llm_comment_status: event.llm_comment_status,
  44. llm_comment_attempts: event.llm_comment_attempts,
  45. llm_comment_last_error: event.llm_comment_last_error,
  46. llm_comment_last_error_preview: text_preview(event.llm_comment_last_error, max: 180),
  47. llm_comment_relevance_score: event.llm_comment_relevance_score,
  48. llm_generated_comment_preview: text_preview(event.llm_generated_comment, max: 260),
  49. has_llm_comment: event.has_llm_generated_comment?,
  50. story_ownership_label: ownership_data["label"].to_s.presence,
  51. story_ownership_summary: ownership_data["summary"].to_s.presence,
  52. story_ownership_confidence: ownership_data["confidence"]
  53. }
  54. end
  55. private
  56. attr_reader :event, :preview_enqueue_ttl_seconds
  57. def extract_ownership_data(metadata:, llm_meta:)
  58. if llm_meta["ownership_classification"].is_a?(Hash)
  59. llm_meta["ownership_classification"]
  60. elsif metadata["story_ownership_classification"].is_a?(Hash)
  61. metadata["story_ownership_classification"]
  62. elsif metadata.dig("validated_story_insights", "ownership_classification").is_a?(Hash)
  63. metadata.dig("validated_story_insights", "ownership_classification")
  64. else
  65. {}
  66. end
  67. end
  68. def blob_path(attachment, disposition: nil)
  69. options = { only_path: true }
  70. options[:disposition] = disposition if disposition.present?
  71. Rails.application.routes.url_helpers.rails_blob_path(attachment, **options)
  72. rescue StandardError
  73. nil
  74. end
  75. def profile_avatar_url(profile)
  76. return nil unless profile
  77. if profile.avatar.attached?
  78. blob_path(profile.avatar)
  79. else
  80. profile.profile_pic_url.to_s.presence
  81. end
  82. end
  83. def media_preview_image_url(metadata:)
  84. url = StoryArchive::MediaPreviewResolver.preferred_preview_image_url(event: event, metadata: metadata)
  85. return url if url.present?
  86. local_video_preview_representation_url
  87. end
  88. def local_video_preview_representation_url
  89. return nil unless event.media.attached?
  90. return nil unless event.media.blob&.content_type.to_s.start_with?("video/")
  91. enqueue_story_preview_generation
  92. nil
  93. rescue StandardError
  94. nil
  95. end
  96. def enqueue_story_preview_generation
  97. return if event.preview_image.attached?
  98. cache_key = "story_archive:preview_enqueue:#{event.id}"
  99. Rails.cache.fetch(cache_key, expires_in: preview_enqueue_ttl_seconds.seconds) do
  100. GenerateStoryPreviewImageJob.perform_later(instagram_profile_event_id: event.id)
  101. true
  102. end
  103. rescue StandardError => e
  104. Rails.logger.warn("[story_media_archive] preview enqueue failed event_id=#{event.id}: #{e.class}: #{e.message}")
  105. end
  106. def text_preview(raw, max:)
  107. text = raw.to_s
  108. return text if text.length <= max
  109. "#{text[0, max]}..."
  110. end
  111. end
  112. end

app/services/instagram_accounts/story_archive_query.rb

0.0% lines covered

100.0% branches covered

61 relevant lines. 0 lines covered and 61 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module InstagramAccounts
  2. class StoryArchiveQuery
  3. DEFAULT_PER_PAGE = 12
  4. MIN_PER_PAGE = 8
  5. MAX_PER_PAGE = 40
  6. Result = Struct.new(:events, :page, :per_page, :total, :has_more, :on, keyword_init: true)
  7. def initialize(account:, page:, per_page:, on: nil)
  8. @account = account
  9. @page = page.to_i
  10. @per_page = per_page.to_i
  11. @raw_on = on
  12. end
  13. def call
  14. parsed_on = parse_archive_date(raw_on)
  15. normalized_page = [page, 1].max
  16. normalized_per_page = normalize_per_page
  17. scoped = base_scope
  18. scoped = scoped.where(
  19. "DATE(COALESCE(instagram_profile_events.occurred_at, instagram_profile_events.detected_at, instagram_profile_events.created_at)) = ?",
  20. parsed_on
  21. ) if parsed_on
  22. scoped = scoped.order(detected_at: :desc, id: :desc)
  23. total = scoped.count
  24. events = scoped.offset((normalized_page - 1) * normalized_per_page).limit(normalized_per_page)
  25. Result.new(
  26. events: events,
  27. page: normalized_page,
  28. per_page: normalized_per_page,
  29. total: total,
  30. has_more: (normalized_page * normalized_per_page) < total,
  31. on: parsed_on
  32. )
  33. end
  34. private
  35. attr_reader :account, :page, :per_page, :raw_on
  36. def base_scope
  37. InstagramProfileEvent
  38. .joins(:instagram_profile)
  39. .joins(:media_attachment)
  40. .includes(:instagram_profile)
  41. .with_attached_media
  42. .with_attached_preview_image
  43. .where(
  44. instagram_profiles: { instagram_account_id: account.id },
  45. kind: InstagramProfileEvent::STORY_ARCHIVE_EVENT_KINDS
  46. )
  47. end
  48. def normalize_per_page
  49. value = per_page
  50. value = DEFAULT_PER_PAGE if value <= 0
  51. value.clamp(MIN_PER_PAGE, MAX_PER_PAGE)
  52. end
  53. def parse_archive_date(raw)
  54. value = raw.to_s.strip
  55. return nil if value.blank?
  56. Date.iso8601(value)
  57. rescue StandardError
  58. nil
  59. end
  60. end
  61. end

app/services/instagram_accounts/technical_details_payload_service.rb

0.0% lines covered

100.0% branches covered

62 relevant lines. 0 lines covered and 62 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module InstagramAccounts
  2. class TechnicalDetailsPayloadService
  3. Result = Struct.new(:payload, :status, keyword_init: true)
  4. def initialize(account:, event_id:)
  5. @account = account
  6. @event_id = event_id
  7. end
  8. def call
  9. event = InstagramProfileEvent.find(event_id)
  10. return not_found_result unless event.instagram_profile&.instagram_account_id == account.id
  11. llm_meta = event.llm_comment_metadata.is_a?(Hash) ? event.llm_comment_metadata : {}
  12. stored_details = llm_meta["technical_details"] || llm_meta[:technical_details]
  13. technical_details = hydrate_technical_details(event: event, technical_details: stored_details)
  14. Result.new(
  15. payload: {
  16. event_id: event.id,
  17. has_llm_comment: event.has_llm_generated_comment?,
  18. llm_comment: event.llm_generated_comment,
  19. generated_at: event.llm_comment_generated_at,
  20. model: event.llm_comment_model,
  21. provider: event.llm_comment_provider,
  22. status: event.llm_comment_status,
  23. relevance_score: event.llm_comment_relevance_score,
  24. last_error: event.llm_comment_last_error,
  25. timeline: story_timeline_for(event: event),
  26. technical_details: technical_details
  27. },
  28. status: :ok
  29. )
  30. rescue StandardError => e
  31. Result.new(payload: { error: e.message }, status: :unprocessable_entity)
  32. end
  33. private
  34. attr_reader :account, :event_id
  35. def not_found_result
  36. Result.new(payload: { error: "Event not found or not accessible" }, status: :not_found)
  37. end
  38. def hydrate_technical_details(event:, technical_details:)
  39. current = technical_details.is_a?(Hash) ? technical_details.deep_stringify_keys : {}
  40. has_required_sections =
  41. current["local_story_intelligence"].is_a?(Hash) &&
  42. current["analysis"].is_a?(Hash) &&
  43. current["prompt_engineering"].is_a?(Hash)
  44. return current if has_required_sections
  45. context = event.send(:build_comment_context)
  46. generated = event.send(:capture_technical_details, context)
  47. generated_hash = generated.is_a?(Hash) ? generated.deep_stringify_keys : {}
  48. generated_hash.deep_merge(current)
  49. rescue StandardError
  50. current
  51. end
  52. def story_timeline_for(event:)
  53. metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
  54. story = event.instagram_stories.order(taken_at: :desc, id: :desc).first
  55. {
  56. story_posted_at: metadata["upload_time"].presence || metadata["taken_at"].presence || story&.taken_at&.iso8601,
  57. downloaded_to_system_at: metadata["downloaded_at"].presence || event.occurred_at&.iso8601 || event.created_at&.iso8601,
  58. event_detected_at: event.detected_at&.iso8601
  59. }
  60. end
  61. end
  62. end

app/services/instagram_profiles/events_query.rb

0.0% lines covered

100.0% branches covered

72 relevant lines. 0 lines covered and 72 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module InstagramProfiles
  2. class EventsQuery
  3. DEFAULT_PER_PAGE = 25
  4. MIN_PER_PAGE = 10
  5. MAX_PER_PAGE = 100
  6. Result = Struct.new(:events, :total, :pages, keyword_init: true)
  7. def initialize(profile:, params:, tabulator: TabulatorParams.new(params: params))
  8. @profile = profile
  9. @params = params
  10. @tabulator = tabulator
  11. end
  12. def call
  13. scope = base_scope
  14. scope = apply_tabulator_event_filters(scope)
  15. query = params[:q].to_s.strip
  16. scope = apply_query(scope, query)
  17. scope = apply_remote_sort(scope) || scope.order(detected_at: :desc, id: :desc)
  18. page = normalize_page(params[:page])
  19. per_page = normalize_per_page(params[:per_page].presence || params[:size].presence)
  20. total = scope.count
  21. pages = (total / per_page.to_f).ceil
  22. rows = scope.offset((page - 1) * per_page).limit(per_page)
  23. Result.new(events: rows, total: total, pages: pages)
  24. end
  25. private
  26. attr_reader :profile, :params, :tabulator
  27. def base_scope
  28. profile.instagram_profile_events.with_attached_media.with_attached_preview_image
  29. end
  30. def apply_tabulator_event_filters(scope)
  31. tabulator.filters.each do |filter|
  32. field = filter[:field]
  33. value = filter[:value]
  34. next if value.blank?
  35. next unless field == "kind"
  36. term = "%#{value.to_s.downcase}%"
  37. scope = scope.where("LOWER(kind) LIKE ?", term)
  38. end
  39. scope
  40. end
  41. def apply_query(scope, query)
  42. return scope if query.blank?
  43. term = "%#{query.downcase}%"
  44. scope.where("LOWER(kind) LIKE ? OR LOWER(COALESCE(external_id, '')) LIKE ?", term, term)
  45. end
  46. def apply_remote_sort(scope)
  47. first = tabulator.sorters.first
  48. return nil unless first.respond_to?(:[])
  49. field = first["field"].to_s
  50. dir = first["dir"].to_s.downcase == "desc" ? "DESC" : "ASC"
  51. case field
  52. when "kind"
  53. scope.order(Arel.sql("kind #{dir}, detected_at DESC, id DESC"))
  54. when "occurred_at"
  55. scope.order(Arel.sql("occurred_at #{dir} NULLS LAST, detected_at DESC, id DESC"))
  56. when "detected_at"
  57. scope.order(Arel.sql("detected_at #{dir}, id #{dir}"))
  58. else
  59. nil
  60. end
  61. end
  62. def normalize_page(raw_page)
  63. value = raw_page.to_i
  64. value.positive? ? value : 1
  65. end
  66. def normalize_per_page(raw_per_page)
  67. value = raw_per_page.to_i
  68. value = DEFAULT_PER_PAGE if value <= 0
  69. value.clamp(MIN_PER_PAGE, MAX_PER_PAGE)
  70. end
  71. end
  72. end

app/services/instagram_profiles/mutual_friends_resolver.rb

0.0% lines covered

100.0% branches covered

40 relevant lines. 0 lines covered and 40 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module InstagramProfiles
  2. class MutualFriendsResolver
  3. def initialize(account:, profile:, client: Instagram::Client.new(account: account))
  4. @account = account
  5. @profile = profile
  6. @client = client
  7. end
  8. def call(limit: 36)
  9. rows = client.fetch_mutual_friends(profile_username: profile.username, limit: limit)
  10. usernames = rows.filter_map { |entry| normalize_username(entry[:username] || entry["username"]) }
  11. existing_profiles = account.instagram_profiles.where(username: usernames).with_attached_avatar.index_by(&:username)
  12. rows.filter_map do |entry|
  13. username = normalize_username(entry[:username] || entry["username"])
  14. next if username.blank? || username == normalize_username(profile.username)
  15. display_name = entry[:display_name] || entry["display_name"]
  16. profile_pic_url = entry[:profile_pic_url] || entry["profile_pic_url"]
  17. existing = existing_profiles[username]
  18. if existing
  19. existing.display_name = display_name if existing.display_name.blank? && display_name.present?
  20. existing.profile_pic_url = profile_pic_url if existing.profile_pic_url.blank? && profile_pic_url.present?
  21. existing
  22. else
  23. account.instagram_profiles.new(
  24. username: username,
  25. display_name: display_name.presence,
  26. profile_pic_url: profile_pic_url.presence
  27. )
  28. end
  29. end
  30. rescue StandardError => e
  31. Rails.logger.warn("Failed to resolve mutual friends for profile #{profile&.username}: #{e.class}: #{e.message}")
  32. []
  33. end
  34. private
  35. attr_reader :account, :profile, :client
  36. def normalize_username(value)
  37. value.to_s.strip.downcase
  38. end
  39. end
  40. end

app/services/instagram_profiles/profiles_index_query.rb

0.0% lines covered

100.0% branches covered

153 relevant lines. 0 lines covered and 153 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module InstagramProfiles
  2. class ProfilesIndexQuery
  3. DEFAULT_PER_PAGE = 50
  4. MIN_PER_PAGE = 10
  5. MAX_PER_PAGE = 200
  6. Result = Struct.new(
  7. :q,
  8. :filter,
  9. :page,
  10. :per_page,
  11. :total,
  12. :pages,
  13. :profiles,
  14. keyword_init: true
  15. )
  16. def initialize(account:, params:, tabulator: TabulatorParams.new(params: params))
  17. @account = account
  18. @params = params
  19. @tabulator = tabulator
  20. end
  21. def call
  22. scope = apply_tabulator_profile_filters(base_scope)
  23. query = params[:q].to_s.strip
  24. scope = apply_query(scope, query)
  25. filter = {
  26. mutual: tabulator.truthy?(:mutual),
  27. following: tabulator.truthy?(:following),
  28. follows_you: tabulator.truthy?(:follows_you),
  29. can_message: tabulator.truthy?(:can_message)
  30. }
  31. scope = apply_filter(scope, filter: filter)
  32. scope = apply_remote_sort(scope) || apply_sort(scope, params[:sort].to_s)
  33. page = normalize_page(params[:page])
  34. per_page = normalize_per_page(params[:per_page].presence || params[:size].presence)
  35. total = scope.count
  36. pages = (total / per_page.to_f).ceil
  37. rows = scope.offset((page - 1) * per_page).limit(per_page)
  38. Result.new(
  39. q: query,
  40. filter: filter,
  41. page: page,
  42. per_page: per_page,
  43. total: total,
  44. pages: pages,
  45. profiles: rows
  46. )
  47. end
  48. private
  49. attr_reader :account, :params, :tabulator
  50. def base_scope
  51. account.instagram_profiles
  52. end
  53. def apply_tabulator_profile_filters(scope)
  54. tabulator.filters.each do |filter|
  55. field = filter[:field]
  56. value = filter[:value]
  57. next if value.blank?
  58. case field
  59. when "username"
  60. term = "%#{value.to_s.downcase}%"
  61. scope = scope.where("LOWER(username) LIKE ?", term)
  62. when "display_name"
  63. term = "%#{value.to_s.downcase}%"
  64. scope = scope.where("LOWER(COALESCE(display_name, '')) LIKE ?", term)
  65. when "following"
  66. parsed = tabulator.parse_tri_bool(value)
  67. scope = scope.where(following: parsed) unless parsed.nil?
  68. when "follows_you"
  69. parsed = tabulator.parse_tri_bool(value)
  70. scope = scope.where(follows_you: parsed) unless parsed.nil?
  71. when "mutual"
  72. parsed = tabulator.parse_tri_bool(value)
  73. if parsed == true
  74. scope = scope.where(following: true, follows_you: true)
  75. elsif parsed == false
  76. scope = scope.where.not(following: true, follows_you: true)
  77. end
  78. when "can_message"
  79. scope = if value.to_s == "unknown"
  80. scope.where(can_message: nil)
  81. else
  82. parsed = tabulator.parse_tri_bool(value)
  83. parsed.nil? ? scope : scope.where(can_message: parsed)
  84. end
  85. end
  86. end
  87. scope
  88. end
  89. def apply_query(scope, query)
  90. return scope if query.blank?
  91. term = "%#{query.downcase}%"
  92. scope.where("LOWER(username) LIKE ? OR LOWER(display_name) LIKE ?", term, term)
  93. end
  94. def apply_filter(scope, filter:)
  95. scope = scope.where(following: true, follows_you: true) if filter[:mutual]
  96. scope = scope.where(following: true) if filter[:following]
  97. scope = scope.where(follows_you: true) if filter[:follows_you]
  98. scope = scope.where(can_message: true) if filter[:can_message]
  99. scope
  100. end
  101. def apply_sort(scope, sort)
  102. case sort
  103. when "username_asc"
  104. scope.order(Arel.sql("username ASC"))
  105. when "username_desc"
  106. scope.order(Arel.sql("username DESC"))
  107. when "recent_sync"
  108. scope.order(Arel.sql("last_synced_at DESC NULLS LAST, username ASC"))
  109. when "messageable"
  110. scope.order(Arel.sql("can_message DESC NULLS LAST, username ASC"))
  111. when "recent_active"
  112. scope.order(Arel.sql("last_active_at DESC NULLS LAST, username ASC"))
  113. else
  114. scope.order(Arel.sql("following DESC, follows_you DESC, username ASC"))
  115. end
  116. end
  117. def apply_remote_sort(scope)
  118. first = tabulator.sorters.first
  119. return nil unless first.respond_to?(:[])
  120. field = first["field"].to_s
  121. dir = first["dir"].to_s.downcase == "desc" ? "DESC" : "ASC"
  122. case field
  123. when "username"
  124. scope.order(Arel.sql("username #{dir}"))
  125. when "display_name"
  126. scope.order(Arel.sql("display_name #{dir} NULLS LAST, username ASC"))
  127. when "following"
  128. scope.order(Arel.sql("following #{dir}, username ASC"))
  129. when "follows_you"
  130. scope.order(Arel.sql("follows_you #{dir}, username ASC"))
  131. when "mutual"
  132. scope.order(Arel.sql("following #{dir}, follows_you #{dir}, username ASC"))
  133. when "can_message"
  134. scope.order(Arel.sql("can_message #{dir} NULLS LAST, username ASC"))
  135. when "last_synced_at"
  136. scope.order(Arel.sql("last_synced_at #{dir} NULLS LAST, username ASC"))
  137. when "last_active_at"
  138. scope.order(Arel.sql("last_active_at #{dir} NULLS LAST, username ASC"))
  139. else
  140. nil
  141. end
  142. end
  143. def normalize_page(raw_page)
  144. value = raw_page.to_i
  145. value.positive? ? value : 1
  146. end
  147. def normalize_per_page(raw_per_page)
  148. value = raw_per_page.to_i
  149. value = DEFAULT_PER_PAGE if value <= 0
  150. value.clamp(MIN_PER_PAGE, MAX_PER_PAGE)
  151. end
  152. end
  153. end

app/services/instagram_profiles/show_snapshot_service.rb

0.0% lines covered

100.0% branches covered

63 relevant lines. 0 lines covered and 63 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module InstagramProfiles
  2. class ShowSnapshotService
  3. AVAILABLE_TAGS = %w[personal_user friend female_friend male_friend relative page excluded automatic_reply].freeze
  4. def initialize(account:, profile:, mutual_limit: 36)
  5. @account = account
  6. @profile = profile
  7. @mutual_limit = mutual_limit.to_i
  8. end
  9. def call
  10. posts_scope = profile.instagram_profile_posts
  11. profile_posts_total_count = posts_scope.count
  12. deleted_posts_count = deleted_posts_count_for(posts_scope)
  13. analyzed_posts_count = posts_scope.where(ai_status: "analyzed").count
  14. behavior_profile = profile.instagram_profile_behavior_profile
  15. behavior_metadata = behavior_profile&.metadata
  16. behavior_metadata = {} unless behavior_metadata.is_a?(Hash)
  17. history_build_state = behavior_metadata["history_build"].is_a?(Hash) ? behavior_metadata["history_build"] : {}
  18. {
  19. profile_posts_total_count: profile_posts_total_count,
  20. deleted_posts_count: deleted_posts_count,
  21. active_posts_count: [profile_posts_total_count - deleted_posts_count, 0].max,
  22. analyzed_posts_count: analyzed_posts_count,
  23. pending_posts_count: [profile_posts_total_count - analyzed_posts_count, 0].max,
  24. messages_count: profile.instagram_messages.count,
  25. action_logs_count: profile.instagram_profile_action_logs.count,
  26. latest_analysis: profile.latest_analysis,
  27. latest_story_intelligence_event: latest_story_intelligence_event,
  28. available_tags: AVAILABLE_TAGS,
  29. history_build_state: history_build_state,
  30. history_ready: ActiveModel::Type::Boolean.new.cast(history_build_state["ready"]),
  31. mutual_profiles: MutualFriendsResolver.new(account: account, profile: profile).call(limit: mutual_limit)
  32. }
  33. end
  34. private
  35. attr_reader :account, :profile, :mutual_limit
  36. def deleted_posts_count_for(posts_scope)
  37. posts_scope
  38. .where.not(metadata: nil)
  39. .pluck(:metadata)
  40. .count { |metadata| ActiveModel::Type::Boolean.new.cast(metadata.is_a?(Hash) ? metadata["deleted_from_source"] : nil) }
  41. end
  42. def latest_story_intelligence_event
  43. profile.instagram_profile_events
  44. .where(kind: InstagramProfileEvent::STORY_ARCHIVE_EVENT_KINDS)
  45. .order(detected_at: :desc, id: :desc)
  46. .limit(60)
  47. .detect do |event|
  48. metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
  49. story_intelligence_available_for_snapshot?(metadata: metadata)
  50. end
  51. end
  52. def story_intelligence_available_for_snapshot?(metadata:)
  53. intelligence = metadata["local_story_intelligence"].is_a?(Hash) ? metadata["local_story_intelligence"] : {}
  54. return true if intelligence.present?
  55. return true if metadata["ocr_text"].to_s.present?
  56. return true if Array(metadata["content_signals"]).any?
  57. return true if Array(metadata["object_detections"]).any?
  58. return true if Array(metadata["ocr_blocks"]).any?
  59. return true if Array(metadata["scenes"]).any?
  60. false
  61. end
  62. end
  63. end

app/services/instagram_profiles/tabulator_events_payload_builder.rb

0.0% lines covered

100.0% branches covered

59 relevant lines. 0 lines covered and 59 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module InstagramProfiles
  2. class TabulatorEventsPayloadBuilder
  3. def initialize(events:, total:, pages:, view_context:)
  4. @events = events
  5. @total = total
  6. @pages = pages
  7. @view_context = view_context
  8. end
  9. def call
  10. {
  11. data: events.map { |event| serialize_event(event) },
  12. last_page: pages,
  13. last_row: total
  14. }
  15. end
  16. private
  17. attr_reader :events, :total, :pages, :view_context
  18. def serialize_event(event)
  19. metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
  20. media_attached = event.media.attached?
  21. {
  22. id: event.id,
  23. kind: event.kind,
  24. external_id: event.external_id,
  25. occurred_at: event.occurred_at&.iso8601,
  26. detected_at: event.detected_at&.iso8601,
  27. metadata_json: metadata_preview_json(metadata),
  28. media_content_type: media_attached ? event.media.blob.content_type : nil,
  29. media_url: media_attached ? blob_path(event.media) : nil,
  30. media_download_url: media_attached ? blob_path(event.media, disposition: "attachment") : nil,
  31. media_preview_image_url: media_preview_image_url(event: event, metadata: metadata),
  32. video_static_frame_only: StoryArchive::MediaPreviewResolver.static_video_preview?(metadata: metadata)
  33. }
  34. end
  35. def media_preview_image_url(event:, metadata:)
  36. url = StoryArchive::MediaPreviewResolver.preferred_preview_image_url(event: event, metadata: metadata)
  37. return url if url.present?
  38. local_video_preview_representation_url(event: event)
  39. end
  40. def local_video_preview_representation_url(event:)
  41. return nil unless event.media.attached?
  42. return nil unless event.media.blob&.content_type.to_s.start_with?("video/")
  43. preview = event.media.preview(resize_to_limit: [640, 640]).processed
  44. view_context.url_for(preview)
  45. rescue StandardError
  46. nil
  47. end
  48. def metadata_preview_json(raw_metadata)
  49. json = (raw_metadata || {}).to_json
  50. return json if json.length <= 1200
  51. "#{json[0, 1200]}..."
  52. end
  53. def blob_path(attachment, disposition: nil)
  54. options = { only_path: true }
  55. options[:disposition] = disposition if disposition.present?
  56. Rails.application.routes.url_helpers.rails_blob_path(attachment, **options)
  57. end
  58. end
  59. end

app/services/instagram_profiles/tabulator_params.rb

0.0% lines covered

100.0% branches covered

59 relevant lines. 0 lines covered and 59 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module InstagramProfiles
  2. class TabulatorParams
  3. def initialize(params:)
  4. @params = params
  5. end
  6. def filters
  7. raw = params[:filters].presence || params[:filter]
  8. return [] unless raw.present?
  9. entries =
  10. case raw
  11. when String
  12. JSON.parse(raw)
  13. when Array
  14. raw
  15. when ActionController::Parameters
  16. raw.to_unsafe_h.values
  17. else
  18. []
  19. end
  20. Array(entries).filter_map do |item|
  21. hash = item.respond_to?(:to_h) ? item.to_h : {}
  22. field = hash["field"].to_s
  23. next if field.blank?
  24. { field: field, value: hash["value"] }
  25. end
  26. rescue StandardError
  27. []
  28. end
  29. def sorters
  30. raw = params[:sorters].presence || params[:sort]
  31. return [] unless raw.present?
  32. case raw
  33. when String
  34. parsed = JSON.parse(raw)
  35. parsed.is_a?(Array) ? parsed : []
  36. when Array
  37. raw
  38. when ActionController::Parameters
  39. raw.to_unsafe_h.values
  40. else
  41. []
  42. end
  43. rescue StandardError
  44. []
  45. end
  46. def parse_tri_bool(value)
  47. normalized = value.to_s
  48. return nil if normalized.blank?
  49. return true if %w[true 1 yes].include?(normalized.downcase)
  50. return false if %w[false 0 no].include?(normalized.downcase)
  51. nil
  52. end
  53. def truthy?(key)
  54. ActiveModel::Type::Boolean.new.cast(params[key])
  55. end
  56. private
  57. attr_reader :params
  58. end
  59. end

app/services/instagram_profiles/tabulator_profiles_payload_builder.rb

0.0% lines covered

100.0% branches covered

43 relevant lines. 0 lines covered and 43 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module InstagramProfiles
  2. class TabulatorProfilesPayloadBuilder
  3. def initialize(profiles:, total:, pages:, view_context:)
  4. @profiles = profiles
  5. @total = total
  6. @pages = pages
  7. @view_context = view_context
  8. end
  9. def call
  10. {
  11. data: profiles.map { |profile| serialize_profile(profile) },
  12. last_page: pages,
  13. last_row: total
  14. }
  15. end
  16. private
  17. attr_reader :profiles, :total, :pages, :view_context
  18. def serialize_profile(profile)
  19. {
  20. id: profile.id,
  21. username: profile.username,
  22. display_name: profile.display_name,
  23. following: profile.following,
  24. follows_you: profile.follows_you,
  25. mutual: profile.mutual?,
  26. can_message: profile.can_message,
  27. restriction_reason: profile.restriction_reason,
  28. last_synced_at: profile.last_synced_at&.iso8601,
  29. last_active_at: profile.last_active_at&.iso8601,
  30. avatar_url: avatar_url_for(profile)
  31. }
  32. end
  33. def avatar_url_for(profile)
  34. if profile.avatar.attached?
  35. Rails.application.routes.url_helpers.rails_blob_path(profile.avatar, only_path: true)
  36. elsif profile.profile_pic_url.present?
  37. profile.profile_pic_url
  38. else
  39. view_context.asset_path("default_avatar.svg")
  40. end
  41. end
  42. end
  43. end

app/services/jobs/context_extractor.rb

0.0% lines covered

100.0% branches covered

77 relevant lines. 0 lines covered and 77 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Jobs
  2. class ContextExtractor
  3. class << self
  4. def from_active_job_arguments(arguments)
  5. payload = normalize_arguments_payload(arguments)
  6. account_id = extract_int(payload, :instagram_account_id)
  7. profile_id = extract_int(payload, :instagram_profile_id)
  8. profile_post_id = extract_int(payload, :instagram_profile_post_id)
  9. scope = if profile_id.present?
  10. "profile"
  11. elsif account_id.present?
  12. "account"
  13. else
  14. "system"
  15. end
  16. {
  17. instagram_account_id: account_id,
  18. instagram_profile_id: profile_id,
  19. instagram_profile_post_id: profile_post_id,
  20. job_scope: scope,
  21. context_label: context_label(scope: scope, account_id: account_id, profile_id: profile_id)
  22. }
  23. rescue StandardError
  24. {
  25. instagram_account_id: nil,
  26. instagram_profile_id: nil,
  27. instagram_profile_post_id: nil,
  28. job_scope: "system",
  29. context_label: "System"
  30. }
  31. end
  32. def from_solid_queue_job_arguments(arguments)
  33. hash = arguments.is_a?(Hash) ? arguments : {}
  34. inner = hash["arguments"] || hash[:arguments]
  35. from_active_job_arguments(inner)
  36. end
  37. def from_sidekiq_item(item)
  38. hash = item.is_a?(Hash) ? item : {}
  39. args = Array(hash["args"])
  40. wrapper = args.first
  41. if wrapper.is_a?(Hash) && wrapper["arguments"].present?
  42. return from_active_job_arguments(wrapper["arguments"])
  43. end
  44. from_active_job_arguments(args)
  45. end
  46. private
  47. def normalize_arguments_payload(arguments)
  48. first = Array(arguments).first
  49. return normalize_hash(first) if first.is_a?(Hash)
  50. hash = normalize_hash(arguments)
  51. nested = hash["arguments"] || hash[:arguments]
  52. return normalize_arguments_payload(nested) if nested.present?
  53. hash
  54. end
  55. def normalize_hash(value)
  56. return value.to_h if value.respond_to?(:to_h)
  57. {}
  58. rescue StandardError
  59. {}
  60. end
  61. def extract_int(hash, key)
  62. value = hash[key.to_s] || hash[key.to_sym]
  63. return nil if value.blank?
  64. Integer(value)
  65. rescue StandardError
  66. nil
  67. end
  68. def context_label(scope:, account_id:, profile_id:)
  69. case scope
  70. when "profile" then "Profile ##{profile_id} (Account ##{account_id || '?'})"
  71. when "account" then "Account ##{account_id}"
  72. else "System"
  73. end
  74. end
  75. end
  76. end
  77. end

app/services/jobs/failure_retry.rb

0.0% lines covered

100.0% branches covered

172 relevant lines. 0 lines covered and 172 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "json"
  2. module Jobs
  3. class FailureRetry
  4. class RetryError < StandardError; end
  5. DEFAULT_AUTO_LIMIT = 20
  6. DEFAULT_AUTO_MAX_ATTEMPTS = 3
  7. DEFAULT_AUTO_COOLDOWN = 10.minutes
  8. PIPELINE_STEP_BY_JOB_CLASS = {
  9. "ProcessPostVisualAnalysisJob" => "visual",
  10. "ProcessPostFaceAnalysisJob" => "face",
  11. "ProcessPostOcrAnalysisJob" => "ocr",
  12. "ProcessPostVideoAnalysisJob" => "video",
  13. "ProcessPostMetadataTaggingJob" => "metadata",
  14. "FinalizePostAnalysisPipelineJob" => nil
  15. }.freeze
  16. class << self
  17. def enqueue!(failure, source: "manual")
  18. raise RetryError, "Failure record is required" unless failure
  19. raise RetryError, "Authentication failures must not be retried" if failure.auth_failure?
  20. raise RetryError, "Failure is marked as non-retryable" unless failure.retryable_now?
  21. job_class = failure.job_class.to_s.safe_constantize
  22. raise RetryError, "Unknown job class: #{failure.job_class}" unless job_class
  23. payload = parse_arguments(failure.arguments_json)
  24. raise RetryError, "Failure is no longer actionable for retry" unless retry_actionable?(failure: failure, payload: payload)
  25. job = perform_later(job_class: job_class, payload: payload)
  26. mark_retry_enqueued!(failure: failure, source: source, job: job)
  27. Ops::LiveUpdateBroadcaster.broadcast!(
  28. topic: "jobs_changed",
  29. account_id: failure.instagram_account_id,
  30. payload: { action: "retry_enqueued", failed_job_id: failure.id, new_job_id: job.job_id },
  31. throttle_key: "jobs_changed",
  32. throttle_seconds: 0
  33. )
  34. job
  35. end
  36. def enqueue_automatic_retries!(limit: DEFAULT_AUTO_LIMIT, max_attempts: DEFAULT_AUTO_MAX_ATTEMPTS, cooldown: DEFAULT_AUTO_COOLDOWN)
  37. cap = limit.to_i.clamp(1, 200)
  38. attempts_cap = max_attempts.to_i.clamp(1, 10)
  39. cool_down = normalize_cooldown(cooldown)
  40. result = { scanned: 0, enqueued: 0, skipped: 0, errors: 0 }
  41. each_retry_candidate(limit: cap * 5) do |failure|
  42. result[:scanned] += 1
  43. unless eligible_for_auto_retry?(failure: failure, max_attempts: attempts_cap, cooldown: cool_down)
  44. result[:skipped] += 1
  45. next
  46. end
  47. begin
  48. enqueue!(failure, source: "auto")
  49. result[:enqueued] += 1
  50. rescue RetryError, StandardError => e
  51. mark_retry_error!(failure: failure, error: e)
  52. result[:errors] += 1
  53. end
  54. break if result[:enqueued] >= cap
  55. end
  56. Ops::StructuredLogger.info(
  57. event: "jobs.failure_retry.auto_batch",
  58. payload: result.merge(limit: cap, max_attempts: attempts_cap, cooldown_seconds: cool_down.to_i)
  59. )
  60. result
  61. end
  62. private
  63. def parse_arguments(raw)
  64. return [] if raw.blank?
  65. parsed = JSON.parse(raw)
  66. parsed.is_a?(Array) ? parsed : [parsed]
  67. rescue StandardError
  68. []
  69. end
  70. def perform_later(job_class:, payload:)
  71. if payload.length == 1 && payload.first.is_a?(Hash)
  72. job_class.perform_later(**payload.first.deep_symbolize_keys)
  73. else
  74. job_class.perform_later(*payload)
  75. end
  76. rescue ArgumentError
  77. job_class.perform_later(*payload)
  78. end
  79. def each_retry_candidate(limit:)
  80. scope = BackgroundJobFailure.where(retryable: true).where.not(failure_kind: "authentication")
  81. scope = scope.where("occurred_at >= ?", 72.hours.ago)
  82. scope.order(occurred_at: :desc, id: :desc).limit(limit).to_a.each do |failure|
  83. yield failure
  84. end
  85. end
  86. def eligible_for_auto_retry?(failure:, max_attempts:, cooldown:)
  87. state = retry_state_for(failure)
  88. attempts = state["attempts"].to_i
  89. return false if attempts >= max_attempts
  90. return false unless retry_actionable?(failure: failure)
  91. last_retry_at = parse_time(state["last_retry_at"])
  92. return true if last_retry_at.blank?
  93. last_retry_at <= cooldown.ago
  94. end
  95. def retry_state_for(failure)
  96. metadata = failure.metadata.is_a?(Hash) ? failure.metadata : {}
  97. raw = metadata["retry_state"].is_a?(Hash) ? metadata["retry_state"] : {}
  98. raw.stringify_keys
  99. rescue StandardError
  100. {}
  101. end
  102. def mark_retry_enqueued!(failure:, source:, job:)
  103. metadata = failure.metadata.is_a?(Hash) ? failure.metadata.deep_dup : {}
  104. state = retry_state_for(failure)
  105. attempts = state["attempts"].to_i + 1
  106. state["attempts"] = attempts
  107. state["last_retry_at"] = Time.current.iso8601
  108. state["last_retry_job_id"] = job.job_id
  109. state["last_retry_source"] = source.to_s
  110. state["last_retry_error"] = nil
  111. metadata["retry_state"] = state
  112. failure.update_columns(metadata: metadata, updated_at: Time.current)
  113. rescue StandardError
  114. nil
  115. end
  116. def mark_retry_error!(failure:, error:)
  117. metadata = failure.metadata.is_a?(Hash) ? failure.metadata.deep_dup : {}
  118. state = retry_state_for(failure)
  119. state["last_retry_error"] = "#{error.class}: #{error.message}"
  120. state["last_retry_attempted_at"] = Time.current.iso8601
  121. metadata["retry_state"] = state
  122. failure.update_columns(metadata: metadata, updated_at: Time.current)
  123. rescue StandardError
  124. nil
  125. end
  126. def parse_time(raw)
  127. return nil if raw.blank?
  128. Time.zone.parse(raw.to_s)
  129. rescue StandardError
  130. nil
  131. end
  132. def normalize_cooldown(value)
  133. return value if value.is_a?(ActiveSupport::Duration)
  134. value.to_i.seconds
  135. rescue StandardError
  136. DEFAULT_AUTO_COOLDOWN
  137. end
  138. def retry_actionable?(failure:, payload: nil)
  139. return true unless PIPELINE_STEP_BY_JOB_CLASS.key?(failure.job_class.to_s)
  140. args = pipeline_args(payload || parse_arguments(failure.arguments_json))
  141. return true unless args.present?
  142. pipeline_run_id = args["pipeline_run_id"].to_s
  143. return true if pipeline_run_id.blank?
  144. post = pipeline_post_from_args(args)
  145. return false unless post
  146. pipeline_state = Ai::PostAnalysisPipelineState.new(post: post)
  147. return false if pipeline_state.pipeline_terminal?(run_id: pipeline_run_id)
  148. step = PIPELINE_STEP_BY_JOB_CLASS[failure.job_class.to_s]
  149. return true if step.blank?
  150. !pipeline_state.step_terminal?(run_id: pipeline_run_id, step: step)
  151. rescue StandardError
  152. true
  153. end
  154. def pipeline_args(payload)
  155. return {} unless payload.is_a?(Array)
  156. first = payload.first
  157. return {} unless first.is_a?(Hash)
  158. first.stringify_keys
  159. end
  160. def pipeline_post_from_args(args)
  161. post_id = args["instagram_profile_post_id"].to_i
  162. return nil if post_id <= 0
  163. profile_id = args["instagram_profile_id"].to_i
  164. account_id = args["instagram_account_id"].to_i
  165. scope = InstagramProfilePost.where(id: post_id)
  166. scope = scope.where(instagram_profile_id: profile_id) if profile_id.positive?
  167. scope = scope.where(instagram_account_id: account_id) if account_id.positive?
  168. scope.first
  169. end
  170. end
  171. end
  172. end

app/services/messaging/integration_service.rb

0.0% lines covered

100.0% branches covered

38 relevant lines. 0 lines covered and 38 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "json"
  2. require "net/http"
  3. require "uri"
  4. module Messaging
  5. class IntegrationService
  6. def initialize(api_url: ENV["OFFICIAL_MESSAGING_API_URL"], access_token: ENV["OFFICIAL_MESSAGING_API_TOKEN"])
  7. @api_url = api_url.to_s.strip
  8. @access_token = access_token.to_s
  9. end
  10. def configured?
  11. @api_url.present? && @access_token.present?
  12. end
  13. def send_text!(recipient_id:, text:, context: {})
  14. raise "Official messaging integration is not configured" unless configured?
  15. uri = URI.parse(@api_url)
  16. req = Net::HTTP::Post.new(uri.request_uri)
  17. req["Content-Type"] = "application/json"
  18. req["Authorization"] = "Bearer #{@access_token}"
  19. req.body = JSON.generate(
  20. recipient_id: recipient_id.to_s,
  21. message: text.to_s,
  22. context: context.to_h
  23. )
  24. http = Net::HTTP.new(uri.host, uri.port)
  25. http.use_ssl = (uri.scheme == "https")
  26. http.open_timeout = 8
  27. http.read_timeout = 20
  28. res = http.request(req)
  29. body = JSON.parse(res.body.to_s.presence || "{}") rescue {}
  30. raise "Official messaging API error: HTTP #{res.code}" unless res.is_a?(Net::HTTPSuccess)
  31. {
  32. ok: true,
  33. status: res.code.to_i,
  34. provider_message_id: body["id"].to_s.presence || body["message_id"].to_s.presence
  35. }
  36. end
  37. end
  38. end

app/services/ops/account_issues.rb

0.0% lines covered

100.0% branches covered

37 relevant lines. 0 lines covered and 37 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Ops
  2. class AccountIssues
  3. def self.for(account)
  4. issues = []
  5. cookie_auth_ok = account.cookie_authenticated?
  6. session_cookie_present = account.sessionid_cookie_present?
  7. if account.cookies.blank?
  8. issues << { level: :bad, message: "No cookies stored. Import cookies or run Manual Browser Login." }
  9. end
  10. if account.login_state.to_s != "authenticated" && !session_cookie_present
  11. issues << { level: :bad, message: "Login state is '#{account.login_state}'. Sync and messaging will likely fail." }
  12. end
  13. if account.user_agent.to_s.strip.blank? && !cookie_auth_ok
  14. issues << { level: :warn, message: "No user-agent saved. Manual login usually captures one; headless sessions can be less stable without it." }
  15. end
  16. snap = account.auth_snapshot
  17. captured_at = snap["captured_at"].to_s
  18. if captured_at.present?
  19. begin
  20. t = Time.iso8601(captured_at)
  21. issues << { level: :warn, message: "Session bundle captured at #{t.strftime('%Y-%m-%d %H:%M:%S')} UTC." } if t < 30.days.ago && !cookie_auth_ok
  22. rescue StandardError
  23. issues << { level: :warn, message: "Auth snapshot captured_at is not parseable." } unless cookie_auth_ok
  24. end
  25. else
  26. issues << { level: :warn, message: "No auth snapshot captured yet." } unless cookie_auth_ok
  27. end
  28. if snap["ig_app_id"].to_s.strip.blank? && !cookie_auth_ok
  29. issues << { level: :warn, message: "No ig_app_id in auth snapshot. API fetches may rely on fallback headers." }
  30. end
  31. if !session_cookie_present
  32. issues << { level: :bad, message: "No sessionid cookie detected. Re-authenticate this account." }
  33. end
  34. issues
  35. end
  36. end
  37. end

app/services/ops/audit_log_builder.rb

0.0% lines covered

100.0% branches covered

60 relevant lines. 0 lines covered and 60 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Ops
  2. class AuditLogBuilder
  3. class << self
  4. def for_account(instagram_account:, limit: 120)
  5. account = instagram_account
  6. cap = limit.to_i.clamp(1, 500)
  7. action_logs =
  8. account.instagram_profile_action_logs
  9. .includes(:instagram_profile)
  10. .order(occurred_at: :desc, id: :desc)
  11. .limit(cap)
  12. .map do |log|
  13. metadata = log.metadata.is_a?(Hash) ? log.metadata : {}
  14. {
  15. type: "action",
  16. occurred_at: log.occurred_at || log.created_at,
  17. profile_id: log.instagram_profile&.id,
  18. profile_username: log.instagram_profile&.username,
  19. kind: log.action.to_s,
  20. status: log.status.to_s,
  21. detail: log.log_text.to_s.presence || log.error_message.to_s.presence || metadata.to_s.byteslice(0, 180),
  22. comment_text: metadata["comment_text"].to_s.presence || metadata["ai_reply_text"].to_s.presence || metadata["posted_comment"].to_s.presence
  23. }
  24. end
  25. events =
  26. InstagramProfileEvent
  27. .joins(:instagram_profile)
  28. .where(instagram_profiles: { instagram_account_id: account.id })
  29. .includes(:instagram_profile, media_attachment: :blob, preview_image_attachment: :blob)
  30. .order(detected_at: :desc, id: :desc)
  31. .limit(cap)
  32. .map do |event|
  33. metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
  34. media_attached = event.media.attached?
  35. {
  36. type: "event",
  37. occurred_at: event.occurred_at || event.detected_at || event.created_at,
  38. profile_id: event.instagram_profile&.id,
  39. profile_username: event.instagram_profile&.username,
  40. kind: event.kind.to_s,
  41. status: "recorded",
  42. detail: metadata.to_s.byteslice(0, 180),
  43. comment_text: metadata["comment_text"].to_s.presence || metadata["ai_reply_text"].to_s.presence || metadata["posted_comment"].to_s.presence,
  44. media_attached: media_attached,
  45. media_url: media_attached ? Rails.application.routes.url_helpers.rails_blob_path(event.media, only_path: true) : nil,
  46. media_download_url: media_attached ? Rails.application.routes.url_helpers.rails_blob_path(event.media, disposition: "attachment", only_path: true) : nil,
  47. media_content_type: media_attached ? event.media.blob&.content_type.to_s : nil,
  48. media_preview_image_url: StoryArchive::MediaPreviewResolver.preferred_preview_image_url(event: event, metadata: metadata),
  49. video_static_frame_only: StoryArchive::MediaPreviewResolver.static_video_preview?(metadata: metadata)
  50. }
  51. end
  52. (action_logs + events)
  53. .sort_by { |e| e[:occurred_at] || Time.at(0) }
  54. .reverse
  55. .first(cap)
  56. end
  57. private
  58. end
  59. end
  60. end

app/services/ops/issue_tracker.rb

0.0% lines covered

100.0% branches covered

132 relevant lines. 0 lines covered and 132 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "digest"
  2. module Ops
  3. class IssueTracker
  4. class << self
  5. def record_job_failure!(job:, exception:, context:, failure_record:)
  6. issue_type = exception.is_a?(Instagram::AuthenticationRequiredError) ? "authentication_required" : "job_failure"
  7. severity = exception.is_a?(Instagram::AuthenticationRequiredError) ? "critical" : "error"
  8. upsert_issue!(
  9. issue_type: issue_type,
  10. source: job.class.name,
  11. severity: severity,
  12. title: issue_title_for(job: job, exception: exception),
  13. details: exception.message.to_s,
  14. instagram_account_id: context[:instagram_account_id],
  15. instagram_profile_id: context[:instagram_profile_id],
  16. background_job_failure_id: failure_record&.id,
  17. metadata: {
  18. queue_name: job.queue_name,
  19. active_job_id: job.job_id,
  20. provider_job_id: job.provider_job_id,
  21. error_class: exception.class.name
  22. },
  23. fingerprint: fingerprint_for_job_failure(job: job, exception: exception, context: context)
  24. )
  25. end
  26. def record_ai_service_check!(ok:, message:, metadata: {})
  27. if ok
  28. resolve_by_fingerprint!(
  29. fingerprint: fingerprint_for("ai_service_health", "AiDashboardController", nil, nil, "ai_microservice_offline"),
  30. notes: "AI microservice healthy again."
  31. )
  32. return
  33. end
  34. upsert_issue!(
  35. issue_type: "ai_service_unavailable",
  36. source: "AiDashboardController",
  37. severity: "critical",
  38. title: "AI microservice unavailable",
  39. details: message.to_s,
  40. metadata: metadata,
  41. fingerprint: fingerprint_for("ai_service_health", "AiDashboardController", nil, nil, "ai_microservice_offline")
  42. )
  43. end
  44. def record_queue_health!(ok:, message:, metadata: {})
  45. fingerprint = fingerprint_for("queue_health", "Sidekiq", nil, nil, "workers_or_backlog")
  46. if ok
  47. resolve_by_fingerprint!(
  48. fingerprint: fingerprint,
  49. notes: "Queue health recovered."
  50. )
  51. return
  52. end
  53. upsert_issue!(
  54. issue_type: "queue_health_degraded",
  55. source: "Sidekiq",
  56. severity: "critical",
  57. title: "Queue processing degraded",
  58. details: message.to_s,
  59. metadata: metadata,
  60. fingerprint: fingerprint
  61. )
  62. end
  63. def resolve_by_fingerprint!(fingerprint:, notes: nil)
  64. issue = AppIssue.find_by(fingerprint: fingerprint.to_s)
  65. return unless issue
  66. return if issue.status == "resolved"
  67. issue.mark_resolved!(notes: notes)
  68. rescue StandardError => e
  69. Rails.logger.warn("[ops.issue_tracker] resolve failed: #{e.class}: #{e.message}")
  70. end
  71. def upsert_issue!(issue_type:, source:, severity:, title:, details:, metadata: {}, fingerprint:, instagram_account_id: nil, instagram_profile_id: nil, background_job_failure_id: nil)
  72. now = Time.current
  73. issue = AppIssue.find_or_initialize_by(fingerprint: fingerprint.to_s)
  74. issue.issue_type = issue_type.to_s
  75. issue.source = source.to_s
  76. issue.severity = normalize_severity(severity)
  77. issue.title = title.to_s
  78. issue.details = details.to_s
  79. issue.instagram_account_id = instagram_account_id
  80. issue.instagram_profile_id = instagram_profile_id
  81. issue.background_job_failure_id = background_job_failure_id
  82. issue.metadata = (issue.metadata || {}).merge(metadata.to_h)
  83. issue.first_seen_at ||= now
  84. issue.last_seen_at = now
  85. issue.occurrences = issue.new_record? ? 1 : issue.occurrences.to_i + 1
  86. issue.status = "open"
  87. issue.resolved_at = nil
  88. issue.save!
  89. issue
  90. rescue StandardError => e
  91. Rails.logger.warn("[ops.issue_tracker] upsert failed: #{e.class}: #{e.message}")
  92. nil
  93. end
  94. private
  95. def issue_title_for(job:, exception:)
  96. if exception.is_a?(Instagram::AuthenticationRequiredError)
  97. "Authentication required for #{job.class.name}"
  98. else
  99. "Job failure in #{job.class.name}"
  100. end
  101. end
  102. def normalize_severity(value)
  103. sev = value.to_s
  104. AppIssue::SEVERITIES.include?(sev) ? sev : "error"
  105. end
  106. def fingerprint_for_job_failure(job:, exception:, context:)
  107. key =
  108. if exception.is_a?(Instagram::AuthenticationRequiredError)
  109. "authentication_required"
  110. else
  111. normalized_error_message(exception.message.to_s)
  112. end
  113. fingerprint_for(
  114. "job_failure",
  115. job.class.name,
  116. context[:instagram_account_id],
  117. context[:instagram_profile_id],
  118. key
  119. )
  120. end
  121. def fingerprint_for(issue_type, source, account_id, profile_id, key)
  122. Digest::SHA256.hexdigest([issue_type, source, account_id, profile_id, key].map(&:to_s).join("|"))
  123. end
  124. def normalized_error_message(msg)
  125. msg.to_s
  126. .gsub(/\b\d{2,}\b/, "<n>")
  127. .gsub(/[0-9a-f]{8,}/i, "<hex>")
  128. .truncate(180)
  129. end
  130. end
  131. end
  132. end

app/services/ops/live_update_broadcaster.rb

0.0% lines covered

100.0% branches covered

46 relevant lines. 0 lines covered and 46 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Ops
  2. class LiveUpdateBroadcaster
  3. THROTTLE_CACHE_PREFIX = "ops:live_update:throttle".freeze
  4. class << self
  5. def global_stream
  6. "operations:global"
  7. end
  8. def account_stream(account_id)
  9. "operations:account:#{account_id}"
  10. end
  11. def broadcast!(topic:, account_id: nil, payload: {}, throttle_key: nil, throttle_seconds: 0.8, include_global: nil)
  12. return if throttled?(topic: topic, account_id: account_id, throttle_key: throttle_key, throttle_seconds: throttle_seconds)
  13. normalized_account_id = account_id.to_i
  14. broadcast_global = include_global.nil? ? normalized_account_id <= 0 : ActiveModel::Type::Boolean.new.cast(include_global)
  15. message = base_message(topic: topic, payload: payload)
  16. ActionCable.server.broadcast(global_stream, message) if broadcast_global
  17. ActionCable.server.broadcast(account_stream(normalized_account_id), message) if normalized_account_id.positive?
  18. rescue StandardError => e
  19. Rails.logger.warn("[ops.live_update] broadcast failed: #{e.class}: #{e.message}")
  20. end
  21. private
  22. def base_message(topic:, payload:)
  23. {
  24. topic: topic.to_s,
  25. sent_at: Time.current.iso8601(3),
  26. payload: payload.is_a?(Hash) ? payload : {}
  27. }
  28. end
  29. def throttled?(topic:, account_id:, throttle_key:, throttle_seconds:)
  30. ttl = throttle_seconds.to_f
  31. return false if ttl <= 0
  32. key = cache_key(topic: topic, account_id: account_id, throttle_key: throttle_key)
  33. already_written = Rails.cache.read(key)
  34. return true if already_written
  35. Rails.cache.write(key, true, expires_in: ttl.seconds)
  36. false
  37. rescue StandardError
  38. false
  39. end
  40. def cache_key(topic:, account_id:, throttle_key:)
  41. suffix = throttle_key.presence || topic.to_s
  42. "#{THROTTLE_CACHE_PREFIX}:#{account_id.to_i}:#{suffix}"
  43. end
  44. end
  45. end
  46. end

app/services/ops/local_ai_health.rb

0.0% lines covered

100.0% branches covered

110 relevant lines. 0 lines covered and 110 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Ops
  2. class LocalAiHealth
  3. CACHE_KEY = "ops:local_ai_health:v1".freeze
  4. CACHE_TTL = ENV.fetch("AI_HEALTH_CACHE_TTL_SECONDS", "900").to_i.seconds
  5. FAILURE_CACHE_TTL = ENV.fetch("AI_HEALTH_FAILURE_CACHE_TTL_SECONDS", "60").to_i.seconds
  6. STALE_AFTER = ENV.fetch("AI_HEALTH_STALE_AFTER_SECONDS", "240").to_i.seconds
  7. class << self
  8. def status
  9. cached = Rails.cache.read(CACHE_KEY)
  10. return missing_status unless cached.present?
  11. annotate_status(cached, source: "cache")
  12. end
  13. def check(force: false, refresh_if_stale: false)
  14. cached = Rails.cache.read(CACHE_KEY)
  15. if cached.present? && !force
  16. annotated = annotate_status(cached, source: "cache")
  17. return annotated unless refresh_if_stale && annotated[:stale]
  18. end
  19. perform_live_check
  20. end
  21. private
  22. def perform_live_check
  23. started_at = monotonic_started_at
  24. checked_at = Time.current
  25. microservice = Ai::LocalMicroserviceClient.new.test_connection!
  26. ollama = Ai::OllamaClient.new.test_connection!
  27. ok = ActiveModel::Type::Boolean.new.cast(microservice[:ok]) && ActiveModel::Type::Boolean.new.cast(ollama[:ok])
  28. result = {
  29. ok: ok,
  30. checked_at: checked_at.iso8601(3),
  31. details: {
  32. microservice: microservice,
  33. ollama: ollama
  34. }
  35. }
  36. Rails.cache.write(CACHE_KEY, result, expires_in: CACHE_TTL)
  37. track_healthcheck_metrics(result: result, started_at: started_at)
  38. annotate_status(result, source: "live")
  39. rescue StandardError => e
  40. failure = {
  41. ok: false,
  42. checked_at: Time.current.iso8601(3),
  43. error: e.message.to_s,
  44. error_class: e.class.name
  45. }
  46. Rails.cache.write(CACHE_KEY, failure, expires_in: FAILURE_CACHE_TTL)
  47. Ai::ApiUsageTracker.track_failure(
  48. provider: "local_ai_stack",
  49. operation: "health_check",
  50. category: "healthcheck",
  51. started_at: started_at,
  52. error: "#{e.class}: #{e.message}",
  53. metadata: failure
  54. )
  55. annotate_status(failure, source: "live")
  56. end
  57. def track_healthcheck_metrics(result:, started_at:)
  58. if ActiveModel::Type::Boolean.new.cast(result[:ok])
  59. Ai::ApiUsageTracker.track_success(
  60. provider: "local_ai_stack",
  61. operation: "health_check",
  62. category: "healthcheck",
  63. started_at: started_at,
  64. metadata: result[:details]
  65. )
  66. else
  67. Ai::ApiUsageTracker.track_failure(
  68. provider: "local_ai_stack",
  69. operation: "health_check",
  70. category: "healthcheck",
  71. started_at: started_at,
  72. error: "One or more local AI components are unavailable",
  73. metadata: result[:details]
  74. )
  75. end
  76. end
  77. def annotate_status(payload, source:)
  78. row = payload.is_a?(Hash) ? payload.deep_symbolize_keys : {}
  79. checked_at_value = row[:checked_at].to_s
  80. checked_at_time = parse_timestamp(checked_at_value)
  81. row.merge(
  82. checked_at: checked_at_value.presence,
  83. stale: checked_at_time.nil? || checked_at_time < STALE_AFTER.ago,
  84. source: source.to_s
  85. )
  86. end
  87. def parse_timestamp(value)
  88. text = value.to_s.strip
  89. return nil if text.blank?
  90. Time.iso8601(text)
  91. rescue StandardError
  92. nil
  93. end
  94. def missing_status
  95. {
  96. ok: false,
  97. checked_at: nil,
  98. stale: true,
  99. source: "missing_cache",
  100. error: "No cached AI health status is available yet."
  101. }
  102. end
  103. def monotonic_started_at
  104. Process.clock_gettime(Process::CLOCK_MONOTONIC)
  105. rescue StandardError
  106. Time.current.to_f
  107. end
  108. end
  109. end
  110. end

app/services/ops/local_story_intelligence_backfill.rb

0.0% lines covered

100.0% branches covered

160 relevant lines. 0 lines covered and 160 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "timeout"
  2. module Ops
  3. class LocalStoryIntelligenceBackfill
  4. DEFAULT_LIMIT = 100
  5. EVENT_TIMEOUT_SECONDS = 45
  6. def initialize(account_id: nil, limit: nil, enqueue_comments: false)
  7. @account_id = account_id.to_s.presence
  8. @limit = limit.to_i.positive? ? limit.to_i : DEFAULT_LIMIT
  9. @enqueue_comments = ActiveModel::Type::Boolean.new.cast(enqueue_comments)
  10. end
  11. def backfill!
  12. result = {
  13. scanned: 0,
  14. enriched: 0,
  15. empty: 0,
  16. queued: 0,
  17. errors: 0,
  18. reasons: Hash.new(0)
  19. }
  20. story_event_scope.each do |event|
  21. break if result[:scanned] >= @limit
  22. next unless event.media.attached?
  23. result[:scanned] += 1
  24. payload = with_event_timeout(event: event) { event.send(:local_story_intelligence_payload) }
  25. next unless payload.is_a?(Hash)
  26. if event.send(:local_story_intelligence_blank?, payload)
  27. result[:empty] += 1
  28. reason = payload[:reason].to_s.presence || "local_story_intelligence_blank"
  29. result[:reasons][reason] += 1
  30. next
  31. end
  32. event.send(:persist_local_story_intelligence!, payload)
  33. result[:enriched] += 1
  34. next unless @enqueue_comments
  35. next unless regeneration_candidate?(event)
  36. if enqueue_comment_job(event, requested_by: "local_story_intelligence_backfill")
  37. result[:queued] += 1
  38. end
  39. rescue StandardError => e
  40. result[:errors] += 1
  41. Ops::StructuredLogger.warn(
  42. event: "story_intelligence.backfill.error",
  43. payload: {
  44. event_id: event&.id,
  45. error_class: e.class.name,
  46. error_message: e.message
  47. }
  48. )
  49. end
  50. result[:reasons] = result[:reasons].sort_by { |_reason, count| -count }.to_h
  51. log_batch(event: "story_intelligence.backfill.completed", result: result)
  52. result
  53. end
  54. def requeue_generation!
  55. result = {
  56. scanned: 0,
  57. queued: 0,
  58. skipped_no_context: 0,
  59. skipped_in_progress: 0,
  60. skipped_not_needed: 0,
  61. errors: 0
  62. }
  63. story_event_scope.each do |event|
  64. break if result[:scanned] >= @limit
  65. next unless event.media.attached?
  66. result[:scanned] += 1
  67. if event.llm_comment_in_progress?
  68. result[:skipped_in_progress] += 1
  69. next
  70. end
  71. unless regeneration_candidate?(event)
  72. result[:skipped_not_needed] += 1
  73. next
  74. end
  75. payload = with_event_timeout(event: event) { event.send(:local_story_intelligence_payload) }
  76. next unless payload.is_a?(Hash)
  77. if event.send(:local_story_intelligence_blank?, payload)
  78. result[:skipped_no_context] += 1
  79. next
  80. end
  81. event.send(:persist_local_story_intelligence!, payload)
  82. result[:queued] += 1 if enqueue_comment_job(event, requested_by: "local_story_intelligence_requeue")
  83. rescue StandardError => e
  84. result[:errors] += 1
  85. Ops::StructuredLogger.warn(
  86. event: "story_intelligence.requeue.error",
  87. payload: {
  88. event_id: event&.id,
  89. error_class: e.class.name,
  90. error_message: e.message
  91. }
  92. )
  93. end
  94. log_batch(event: "story_intelligence.requeue.completed", result: result)
  95. result
  96. end
  97. private
  98. def story_event_scope
  99. scope = InstagramProfileEvent
  100. .where(kind: InstagramProfileEvent::STORY_ARCHIVE_EVENT_KINDS)
  101. .includes(:instagram_profile)
  102. .order(detected_at: :desc, id: :desc)
  103. if @account_id.present?
  104. scope = scope.joins(:instagram_profile).where(instagram_profiles: { instagram_account_id: @account_id })
  105. end
  106. scope
  107. end
  108. def regeneration_candidate?(event)
  109. metadata = event.llm_comment_metadata.is_a?(Hash) ? event.llm_comment_metadata : {}
  110. source = metadata["source"].to_s
  111. pipeline = metadata["pipeline"].to_s
  112. return true if event.llm_comment_status.to_s == "failed"
  113. return true if event.llm_generated_comment.to_s.blank?
  114. return true if source == "fallback"
  115. return true if pipeline.present? && pipeline != "local_story_intelligence_v2"
  116. false
  117. end
  118. def enqueue_comment_job(event, requested_by:)
  119. job = GenerateLlmCommentJob.perform_later(
  120. instagram_profile_event_id: event.id,
  121. provider: "local",
  122. requested_by: requested_by
  123. )
  124. event.queue_llm_comment_generation!(job_id: job.job_id)
  125. true
  126. rescue StandardError => e
  127. Ops::StructuredLogger.warn(
  128. event: "story_intelligence.comment_enqueue.error",
  129. payload: {
  130. event_id: event.id,
  131. requested_by: requested_by,
  132. error_class: e.class.name,
  133. error_message: e.message
  134. }
  135. )
  136. false
  137. end
  138. def log_batch(event:, result:)
  139. Ops::StructuredLogger.info(
  140. event: event,
  141. payload: {
  142. account_id: @account_id,
  143. limit: @limit
  144. }.merge(result.except(:reasons)).merge(reasons: result[:reasons])
  145. )
  146. end
  147. def with_event_timeout(event:, &block)
  148. Timeout.timeout(EVENT_TIMEOUT_SECONDS, &block)
  149. rescue Timeout::Error
  150. Ops::StructuredLogger.warn(
  151. event: "story_intelligence.event_timeout",
  152. payload: {
  153. event_id: event&.id,
  154. timeout_seconds: EVENT_TIMEOUT_SECONDS
  155. }
  156. )
  157. nil
  158. end
  159. end
  160. end

app/services/ops/metrics.rb

0.0% lines covered

100.0% branches covered

164 relevant lines. 0 lines covered and 164 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Ops
  2. class Metrics
  3. API_USAGE_WINDOW = 24.hours
  4. def self.system
  5. usage_scope = AiApiCall.where(occurred_at: API_USAGE_WINDOW.ago..Time.current)
  6. {
  7. queue: queue_counts,
  8. app: {
  9. accounts: InstagramAccount.count,
  10. continuous_processing_enabled_accounts: InstagramAccount.where(continuous_processing_enabled: true).count,
  11. continuous_processing_running_accounts: InstagramAccount.where(continuous_processing_state: "running").count,
  12. continuous_processing_backoff_accounts: InstagramAccount.where("continuous_processing_retry_after_at > ?", Time.current).count,
  13. profiles: InstagramProfile.count,
  14. messages: InstagramMessage.count,
  15. profile_events: InstagramProfileEvent.count,
  16. ai_analyses: AiAnalysis.count,
  17. ai_api_calls: AiApiCall.count,
  18. posts: InstagramPost.count,
  19. sync_runs: SyncRun.count,
  20. failures_24h: BackgroundJobFailure.where("occurred_at >= ?", 24.hours.ago).count,
  21. visual_analysis_failures_24h: BackgroundJobFailure.where(job_class: "ProcessPostVisualAnalysisJob")
  22. .where("occurred_at >= ?", 24.hours.ago).count,
  23. auth_failures_24h: BackgroundJobFailure.where(failure_kind: "authentication").where("occurred_at >= ?", 24.hours.ago).count,
  24. active_issues: AppIssue.where.not(status: "resolved").count,
  25. storage_ingestions_24h: ActiveStorageIngestion.where("created_at >= ?", 24.hours.ago).count,
  26. continuous_processing_runs_24h: SyncRun.where(kind: "continuous_processing").where("created_at >= ?", 24.hours.ago).count
  27. },
  28. api_usage_24h: api_usage_summary(scope: usage_scope),
  29. visual_failures_24h: visual_failure_summary(scope: BackgroundJobFailure.where(job_class: "ProcessPostVisualAnalysisJob")
  30. .where("occurred_at >= ?", 24.hours.ago))
  31. }
  32. end
  33. def self.for_account(account)
  34. usage_scope = AiApiCall.where(instagram_account_id: account.id, occurred_at: API_USAGE_WINDOW.ago..Time.current)
  35. {
  36. app: {
  37. profiles: account.instagram_profiles.count,
  38. mutuals: account.instagram_profiles.where(following: true, follows_you: true).count,
  39. following: account.instagram_profiles.where(following: true).count,
  40. followers: account.instagram_profiles.where(follows_you: true).count,
  41. messages: account.instagram_messages.count,
  42. profile_events: InstagramProfileEvent.joins(:instagram_profile).where(instagram_profiles: { instagram_account_id: account.id }).count,
  43. ai_analyses: account.ai_analyses.count,
  44. ai_api_calls: account.ai_api_calls.count,
  45. posts: account.instagram_posts.count,
  46. sync_runs: account.sync_runs.count,
  47. failures_24h: BackgroundJobFailure.where(instagram_account_id: account.id)
  48. .where("occurred_at >= ?", 24.hours.ago).count,
  49. visual_analysis_failures_24h: BackgroundJobFailure.where(instagram_account_id: account.id, job_class: "ProcessPostVisualAnalysisJob")
  50. .where("occurred_at >= ?", 24.hours.ago).count,
  51. auth_failures_24h: BackgroundJobFailure.where(instagram_account_id: account.id, failure_kind: "authentication")
  52. .where("occurred_at >= ?", 24.hours.ago).count,
  53. active_issues: account.app_issues.where.not(status: "resolved").count,
  54. storage_ingestions_24h: account.active_storage_ingestions.where("created_at >= ?", 24.hours.ago).count,
  55. continuous_processing_state: account.continuous_processing_state,
  56. continuous_processing_failure_count: account.continuous_processing_failure_count.to_i,
  57. continuous_processing_backoff_active: account.continuous_processing_backoff_active?,
  58. continuous_processing_runs_24h: account.sync_runs.where(kind: "continuous_processing").where("created_at >= ?", 24.hours.ago).count
  59. },
  60. sync_runs_by_status: account.sync_runs.group(:status).count,
  61. analyses_by_status: account.ai_analyses.group(:status).count,
  62. api_usage_24h: api_usage_summary(scope: usage_scope),
  63. visual_failures_24h: visual_failure_summary(scope: BackgroundJobFailure.where(instagram_account_id: account.id, job_class: "ProcessPostVisualAnalysisJob")
  64. .where("occurred_at >= ?", 24.hours.ago)),
  65. queue: queue_counts
  66. }
  67. end
  68. def self.queue_counts
  69. sidekiq_backend? ? sidekiq_counts : solid_queue_counts
  70. end
  71. def self.sidekiq_counts
  72. require "sidekiq/api"
  73. queues = Sidekiq::Queue.all
  74. queue_rows = queues.map { |queue| { name: queue.name, size: queue.size } }
  75. {
  76. backend: "sidekiq",
  77. enqueued: queue_rows.sum { |row| row[:size].to_i },
  78. scheduled: Sidekiq::ScheduledSet.new.size,
  79. retries: Sidekiq::RetrySet.new.size,
  80. dead: Sidekiq::DeadSet.new.size,
  81. processes: Sidekiq::ProcessSet.new.size,
  82. queues: queue_rows
  83. }
  84. rescue StandardError
  85. {
  86. backend: "sidekiq",
  87. enqueued: 0,
  88. scheduled: 0,
  89. retries: 0,
  90. dead: 0,
  91. processes: 0,
  92. queues: []
  93. }
  94. end
  95. def self.solid_queue_counts
  96. {
  97. backend: "solid_queue",
  98. ready: safe_count { SolidQueue::ReadyExecution.count },
  99. scheduled: safe_count { SolidQueue::ScheduledExecution.count },
  100. claimed: safe_count { SolidQueue::ClaimedExecution.count },
  101. blocked: safe_count { SolidQueue::BlockedExecution.count },
  102. failed: safe_count { SolidQueue::FailedExecution.count },
  103. processes: safe_count { SolidQueue::Process.count }
  104. }
  105. end
  106. def self.sidekiq_backend?
  107. Rails.application.config.active_job.queue_adapter.to_s == "sidekiq"
  108. rescue StandardError
  109. false
  110. end
  111. def self.safe_count
  112. yield
  113. rescue StandardError
  114. 0
  115. end
  116. def self.api_usage_summary(scope:)
  117. by_category = scope.group(:category).count.transform_keys(&:to_s)
  118. by_provider = scope.group(:provider).count.transform_keys(&:to_s)
  119. by_status = scope.group(:status).count.transform_keys(&:to_s)
  120. by_operation =
  121. scope.group(:operation).count.transform_keys(&:to_s)
  122. .sort_by { |_operation, count| -count.to_i }
  123. .first(10)
  124. .to_h
  125. {
  126. total_calls: scope.count,
  127. failed_calls: by_status["failed"].to_i,
  128. image_analysis_calls: by_category["image_analysis"].to_i,
  129. image_analysis_failures: scope.where(category: "image_analysis", status: "failed").count,
  130. report_generation_calls: by_category["report_generation"].to_i,
  131. text_generation_calls: by_category["text_generation"].to_i,
  132. total_tokens: scope.sum(:total_tokens).to_i,
  133. avg_latency_ms: scope.where.not(latency_ms: nil).average(:latency_ms)&.round(1),
  134. by_category: by_category,
  135. by_provider: by_provider,
  136. by_status: by_status,
  137. top_operations: by_operation
  138. }
  139. end
  140. def self.visual_failure_summary(scope:)
  141. top_errors =
  142. scope.group(:error_class, :error_message)
  143. .count
  144. .sort_by { |_row, count| -count.to_i }
  145. .first(5)
  146. .map do |(error_class, error_message), count|
  147. {
  148. error_class: error_class.to_s,
  149. error_message: error_message.to_s.byteslice(0, 180),
  150. count: count.to_i
  151. }
  152. end
  153. {
  154. total_failures: scope.count,
  155. by_error: top_errors
  156. }
  157. rescue StandardError
  158. {
  159. total_failures: 0,
  160. by_error: []
  161. }
  162. end
  163. end
  164. end

app/services/ops/queue_health.rb

0.0% lines covered

100.0% branches covered

43 relevant lines. 0 lines covered and 43 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Ops
  2. class QueueHealth
  3. STUCK_BACKLOG_THRESHOLD = 1
  4. def self.check!
  5. counts = Ops::Metrics.queue_counts
  6. return { ok: true, backend: counts[:backend].to_s } unless counts[:backend].to_s == "sidekiq"
  7. enqueued = counts[:enqueued].to_i
  8. scheduled = counts[:scheduled].to_i
  9. retries = counts[:retries].to_i
  10. dead = counts[:dead].to_i
  11. processes = counts[:processes].to_i
  12. no_worker_with_backlog = processes.zero? && (enqueued + scheduled + retries) >= STUCK_BACKLOG_THRESHOLD
  13. if no_worker_with_backlog
  14. message = "No Sidekiq workers detected while queue backlog is present."
  15. Ops::IssueTracker.record_queue_health!(
  16. ok: false,
  17. message: message,
  18. metadata: counts
  19. )
  20. Ops::StructuredLogger.error(event: "queue.health.failed", payload: counts.merge(message: message))
  21. return { ok: false, reason: "no_workers_with_backlog", counts: counts }
  22. end
  23. if dead.positive?
  24. Ops::StructuredLogger.warn(
  25. event: "queue.health.dead_jobs_present",
  26. payload: counts
  27. )
  28. end
  29. Ops::IssueTracker.record_queue_health!(
  30. ok: true,
  31. message: "Sidekiq queue healthy.",
  32. metadata: counts
  33. )
  34. { ok: true, counts: counts }
  35. rescue StandardError => e
  36. Ops::StructuredLogger.error(
  37. event: "queue.health.check_failed",
  38. payload: { error_class: e.class.name, error_message: e.message }
  39. )
  40. { ok: false, reason: "check_failed", error_class: e.class.name, error_message: e.message }
  41. end
  42. end
  43. end

app/services/ops/resource_guard.rb

0.0% lines covered

100.0% branches covered

111 relevant lines. 0 lines covered and 111 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "etc"
  2. module Ops
  3. class ResourceGuard
  4. DEFAULT_MAX_LOAD_PER_CORE = ENV.fetch("AI_MAX_LOAD_PER_CORE", "1.20").to_f
  5. DEFAULT_MIN_AVAILABLE_MEMORY_MB = ENV.fetch("AI_MIN_AVAILABLE_MEMORY_MB", "700").to_i
  6. DEFAULT_MAX_QUEUE_DEPTH = ENV.fetch("AI_MAX_QUEUE_DEPTH", "220").to_i
  7. DEFAULT_RETRY_SECONDS = ENV.fetch("AI_RESOURCE_RETRY_SECONDS", "20").to_i
  8. class << self
  9. def allow_ai_task?(task:, queue_name:, critical: false)
  10. snapshot = snapshot(queue_name: queue_name)
  11. overloaded = overloaded?(snapshot: snapshot)
  12. if !overloaded || ActiveModel::Type::Boolean.new.cast(critical)
  13. return {
  14. allow: true,
  15. reason: nil,
  16. retry_in_seconds: nil,
  17. snapshot: snapshot,
  18. task: task.to_s
  19. }
  20. end
  21. {
  22. allow: false,
  23. reason: reason_for(snapshot: snapshot),
  24. retry_in_seconds: retry_seconds_for(snapshot: snapshot),
  25. snapshot: snapshot,
  26. task: task.to_s
  27. }
  28. rescue StandardError => e
  29. {
  30. allow: true,
  31. reason: "resource_guard_error:#{e.class}",
  32. retry_in_seconds: nil,
  33. snapshot: { error: e.message.to_s },
  34. task: task.to_s
  35. }
  36. end
  37. def snapshot(queue_name: nil)
  38. {
  39. queue_name: queue_name.to_s,
  40. queue_depth: queue_depth_for(queue_name: queue_name),
  41. load_average_1m: load_average_1m,
  42. load_per_core: load_per_core,
  43. cpu_cores: cpu_cores,
  44. available_memory_mb: available_memory_mb,
  45. checked_at: Time.current.iso8601(3)
  46. }
  47. end
  48. private
  49. def overloaded?(snapshot:)
  50. snapshot[:load_per_core].to_f > DEFAULT_MAX_LOAD_PER_CORE ||
  51. snapshot[:available_memory_mb].to_i < DEFAULT_MIN_AVAILABLE_MEMORY_MB ||
  52. snapshot[:queue_depth].to_i > DEFAULT_MAX_QUEUE_DEPTH
  53. end
  54. def reason_for(snapshot:)
  55. return "high_queue_depth" if snapshot[:queue_depth].to_i > DEFAULT_MAX_QUEUE_DEPTH
  56. return "high_cpu_load" if snapshot[:load_per_core].to_f > DEFAULT_MAX_LOAD_PER_CORE
  57. return "low_available_memory" if snapshot[:available_memory_mb].to_i < DEFAULT_MIN_AVAILABLE_MEMORY_MB
  58. "resource_pressure"
  59. end
  60. def retry_seconds_for(snapshot:)
  61. case reason_for(snapshot: snapshot)
  62. when "high_queue_depth"
  63. DEFAULT_RETRY_SECONDS
  64. when "high_cpu_load"
  65. DEFAULT_RETRY_SECONDS + 10
  66. when "low_available_memory"
  67. DEFAULT_RETRY_SECONDS + 20
  68. else
  69. DEFAULT_RETRY_SECONDS
  70. end
  71. end
  72. def queue_depth_for(queue_name:)
  73. return 0 if queue_name.to_s.blank?
  74. return 0 unless sidekiq_backend?
  75. require "sidekiq/api"
  76. Sidekiq::Queue.new(queue_name.to_s).size
  77. rescue StandardError
  78. 0
  79. end
  80. def sidekiq_backend?
  81. Rails.application.config.active_job.queue_adapter.to_s == "sidekiq"
  82. rescue StandardError
  83. false
  84. end
  85. def load_average_1m
  86. File.read("/proc/loadavg").to_s.split.first.to_f
  87. rescue StandardError
  88. 0.0
  89. end
  90. def cpu_cores
  91. value = Etc.nprocessors
  92. value.to_i.positive? ? value.to_i : 1
  93. rescue StandardError
  94. 1
  95. end
  96. def load_per_core
  97. load_average_1m.to_f / cpu_cores.to_f
  98. rescue StandardError
  99. load_average_1m.to_f
  100. end
  101. def available_memory_mb
  102. line = File.readlines("/proc/meminfo").find { |row| row.start_with?("MemAvailable:") }
  103. return 0 unless line
  104. kb = line.split[1].to_i
  105. (kb / 1024.0).round
  106. rescue StandardError
  107. 0
  108. end
  109. end
  110. end
  111. end

app/services/ops/structured_logger.rb

0.0% lines covered

100.0% branches covered

34 relevant lines. 0 lines covered and 34 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "json"
  2. module Ops
  3. class StructuredLogger
  4. class << self
  5. def info(event:, payload: {})
  6. write(level: :info, event: event, payload: payload)
  7. end
  8. def warn(event:, payload: {})
  9. write(level: :warn, event: event, payload: payload)
  10. end
  11. def error(event:, payload: {})
  12. write(level: :error, event: event, payload: payload)
  13. end
  14. def write(level:, event:, payload: {})
  15. logger = Rails.logger
  16. method = logger.respond_to?(level) ? level : :info
  17. logger.public_send(method, serialize(event: event, payload: payload))
  18. rescue StandardError
  19. nil
  20. end
  21. private
  22. def serialize(event:, payload: {})
  23. data = {
  24. ts: Time.current.iso8601(3),
  25. event: event.to_s,
  26. pid: Process.pid
  27. }
  28. payload_hash = payload.is_a?(Hash) ? payload : { message: payload.to_s }
  29. data.merge!(payload_hash.compact)
  30. JSON.generate(data)
  31. end
  32. end
  33. end
  34. end

app/services/person_identity_feedback_service.rb

0.0% lines covered

100.0% branches covered

298 relevant lines. 0 lines covered and 298 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class PersonIdentityFeedbackService
  2. class FeedbackError < StandardError; end
  3. MAX_LINKED_USERNAMES = 30
  4. FEEDBACK_VERSION = "v1".freeze
  5. def confirm_person!(person:, label: nil, real_person_status: "confirmed_real_person")
  6. raise FeedbackError, "Person record is required" unless person&.persisted?
  7. now = Time.current
  8. person.with_lock do
  9. metadata = normalize_metadata(person.metadata)
  10. feedback = normalize_feedback(metadata)
  11. feedback["real_person_status"] = normalize_real_person_status(real_person_status)
  12. feedback["last_action"] = "confirm_person"
  13. feedback["confirmed_count"] = feedback["confirmed_count"].to_i + 1
  14. feedback["last_action_at"] = now.iso8601
  15. feedback["feedback_version"] = FEEDBACK_VERSION
  16. metadata["user_feedback"] = feedback
  17. linked = Array(metadata["linked_usernames"]).map { |value| normalize_username(value) }.reject(&:blank?).uniq
  18. profile_username = normalize_username(person.instagram_profile&.username)
  19. linked << profile_username if profile_username.present? && person.role.to_s == "primary_user"
  20. metadata["linked_usernames"] = linked.first(MAX_LINKED_USERNAMES)
  21. person.label = label.to_s.strip if label.to_s.strip.present?
  22. person.metadata = metadata
  23. person.save!
  24. person.sync_identity_confidence!(timestamp: now)
  25. person
  26. end
  27. end
  28. def mark_incorrect!(person:, reason: nil)
  29. raise FeedbackError, "Person record is required" unless person&.persisted?
  30. now = Time.current
  31. person.with_lock do
  32. metadata = normalize_metadata(person.metadata)
  33. feedback = normalize_feedback(metadata)
  34. feedback["real_person_status"] = "incorrect"
  35. feedback["last_action"] = "mark_incorrect"
  36. feedback["last_action_at"] = now.iso8601
  37. feedback["incorrect_reason"] = reason.to_s.strip if reason.to_s.strip.present?
  38. feedback["feedback_version"] = FEEDBACK_VERSION
  39. metadata["user_feedback"] = feedback
  40. metadata["matching_disabled"] = true
  41. metadata["matching_disabled_reason"] = reason.to_s.strip.presence || "marked_incorrect"
  42. attrs = {
  43. role: person.role.to_s == "primary_user" ? "unknown" : person.role,
  44. metadata: metadata,
  45. canonical_embedding: nil
  46. }
  47. attrs[:canonical_embedding_vector] = nil if person.respond_to?(:canonical_embedding_vector=)
  48. person.update!(attrs)
  49. annotate_face_feedback!(person: person, status: "incorrect", reason: reason)
  50. person.sync_identity_confidence!(timestamp: now)
  51. person
  52. end
  53. end
  54. def link_profile_owner!(person:)
  55. raise FeedbackError, "Person record is required" unless person&.persisted?
  56. profile = person.instagram_profile
  57. raise FeedbackError, "Profile not found for person" unless profile
  58. now = Time.current
  59. InstagramStoryPerson.transaction do
  60. InstagramStoryPerson
  61. .where(instagram_profile_id: profile.id, role: "primary_user")
  62. .where.not(id: person.id)
  63. .update_all(role: "secondary_person", updated_at: now)
  64. person.with_lock do
  65. metadata = normalize_metadata(person.metadata)
  66. feedback = normalize_feedback(metadata)
  67. feedback["last_action"] = "link_profile_owner"
  68. feedback["last_action_at"] = now.iso8601
  69. feedback["real_person_status"] = "confirmed_real_person"
  70. feedback["owner_link_confirmed"] = true
  71. feedback["feedback_version"] = FEEDBACK_VERSION
  72. linked = Array(metadata["linked_usernames"]).map { |value| normalize_username(value) }.reject(&:blank?).uniq
  73. profile_username = normalize_username(profile.username)
  74. linked << profile_username if profile_username.present?
  75. metadata["linked_usernames"] = linked.first(MAX_LINKED_USERNAMES)
  76. metadata["user_feedback"] = feedback
  77. person.update!(
  78. role: "primary_user",
  79. label: person.label.to_s.presence || profile.username.to_s,
  80. metadata: metadata
  81. )
  82. person.sync_identity_confidence!(timestamp: now)
  83. end
  84. end
  85. person
  86. end
  87. def merge_people!(source_person:, target_person:)
  88. validate_merge!(source_person: source_person, target_person: target_person)
  89. now = Time.current
  90. InstagramStoryPerson.transaction do
  91. source_person.lock!
  92. target_person.lock!
  93. moved_post_faces = source_person.instagram_post_faces.update_all(
  94. instagram_story_person_id: target_person.id,
  95. role: target_person.role.to_s,
  96. updated_at: now
  97. )
  98. moved_story_faces = source_person.instagram_story_faces.update_all(
  99. instagram_story_person_id: target_person.id,
  100. role: target_person.role.to_s,
  101. updated_at: now
  102. )
  103. target_metadata = merge_person_metadata!(
  104. target_person: target_person,
  105. source_person: source_person,
  106. moved_post_faces: moved_post_faces,
  107. moved_story_faces: moved_story_faces,
  108. merged_at: now
  109. )
  110. target_person.update!(
  111. appearance_count: recompute_appearance_count(target_person),
  112. first_seen_at: [ target_person.first_seen_at, source_person.first_seen_at ].compact.min,
  113. last_seen_at: [ target_person.last_seen_at, source_person.last_seen_at ].compact.max,
  114. canonical_embedding: merged_embedding(target_person: target_person, source_person: source_person).presence,
  115. metadata: target_metadata
  116. )
  117. target_person.update_column(:canonical_embedding_vector, target_person.canonical_embedding.presence) if target_person.respond_to?(:canonical_embedding_vector=)
  118. target_person.sync_identity_confidence!(timestamp: now)
  119. source_metadata = normalize_metadata(source_person.metadata)
  120. source_feedback = normalize_feedback(source_metadata)
  121. source_feedback["last_action"] = "merged_into_person"
  122. source_feedback["last_action_at"] = now.iso8601
  123. source_feedback["merged_into_person_id"] = target_person.id
  124. source_feedback["feedback_version"] = FEEDBACK_VERSION
  125. source_metadata["user_feedback"] = source_feedback
  126. source_metadata["merged_into_person_id"] = target_person.id
  127. source_metadata["merged_at"] = now.iso8601
  128. source_metadata["matching_disabled"] = true
  129. source_metadata["matching_disabled_reason"] = "merged_into_#{target_person.id}"
  130. source_person.update!(
  131. role: "unknown",
  132. appearance_count: 0,
  133. canonical_embedding: nil,
  134. metadata: source_metadata
  135. )
  136. source_person.update_column(:canonical_embedding_vector, nil) if source_person.respond_to?(:canonical_embedding_vector=)
  137. source_person.sync_identity_confidence!(timestamp: now)
  138. end
  139. target_person
  140. end
  141. def separate_face!(person:, face:)
  142. raise FeedbackError, "Person record is required" unless person&.persisted?
  143. raise FeedbackError, "Face record is required" unless face&.persisted?
  144. raise FeedbackError, "Face is not linked to this person" unless face.instagram_story_person_id == person.id
  145. now = Time.current
  146. vector = normalize_vector(face.embedding)
  147. new_metadata = {
  148. "source" => "user_feedback_split",
  149. "separated_from_person_id" => person.id,
  150. "user_feedback" => {
  151. "real_person_status" => "unverified",
  152. "last_action" => "separate_face",
  153. "last_action_at" => now.iso8601,
  154. "feedback_version" => FEEDBACK_VERSION
  155. }
  156. }
  157. attrs = {
  158. instagram_account: person.instagram_account,
  159. instagram_profile: person.instagram_profile,
  160. role: "secondary_person",
  161. first_seen_at: now,
  162. last_seen_at: now,
  163. appearance_count: 1,
  164. canonical_embedding: vector.presence,
  165. metadata: new_metadata
  166. }
  167. attrs[:canonical_embedding_vector] = vector if person.respond_to?(:canonical_embedding_vector=) && vector.present?
  168. new_person = InstagramStoryPerson.create!(attrs)
  169. update_face_feedback_metadata!(face: face, status: "separated", reason: "split_from_person_#{person.id}", timestamp: now)
  170. face.update!(
  171. instagram_story_person: new_person,
  172. role: new_person.role
  173. )
  174. recompute_person_after_face_change!(person: person, timestamp: now)
  175. new_person.sync_identity_confidence!(timestamp: now)
  176. person.reload
  177. new_person
  178. end
  179. private
  180. def validate_merge!(source_person:, target_person:)
  181. raise FeedbackError, "Source person is required" unless source_person&.persisted?
  182. raise FeedbackError, "Target person is required" unless target_person&.persisted?
  183. raise FeedbackError, "Source and target person cannot be the same" if source_person.id == target_person.id
  184. if source_person.instagram_profile_id != target_person.instagram_profile_id ||
  185. source_person.instagram_account_id != target_person.instagram_account_id
  186. raise FeedbackError, "People can only be merged within the same account/profile"
  187. end
  188. end
  189. def annotate_face_feedback!(person:, status:, reason:)
  190. now = Time.current
  191. person.instagram_post_faces.find_each do |face|
  192. update_face_feedback_metadata!(face: face, status: status, reason: reason, timestamp: now)
  193. end
  194. person.instagram_story_faces.find_each do |face|
  195. update_face_feedback_metadata!(face: face, status: status, reason: reason, timestamp: now)
  196. end
  197. end
  198. def update_face_feedback_metadata!(face:, status:, reason:, timestamp:)
  199. metadata = normalize_metadata(face.metadata)
  200. feedback = metadata["user_feedback"].is_a?(Hash) ? metadata["user_feedback"].deep_dup : {}
  201. feedback["status"] = status.to_s
  202. feedback["reason"] = reason.to_s.strip if reason.to_s.strip.present?
  203. feedback["updated_at"] = timestamp.iso8601
  204. feedback["version"] = FEEDBACK_VERSION
  205. metadata["user_feedback"] = feedback
  206. face.update_columns(metadata: metadata, updated_at: timestamp)
  207. rescue StandardError
  208. nil
  209. end
  210. def merge_person_metadata!(target_person:, source_person:, moved_post_faces:, moved_story_faces:, merged_at:)
  211. target_metadata = normalize_metadata(target_person.metadata)
  212. source_metadata = normalize_metadata(source_person.metadata)
  213. target_feedback = normalize_feedback(target_metadata)
  214. source_feedback = normalize_feedback(source_metadata)
  215. target_feedback["last_action"] = "merge_person"
  216. target_feedback["last_action_at"] = merged_at.iso8601
  217. target_feedback["feedback_version"] = FEEDBACK_VERSION
  218. target_feedback["merge_count"] = target_feedback["merge_count"].to_i + 1
  219. target_metadata["user_feedback"] = target_feedback
  220. source_linked = Array(source_metadata["linked_usernames"]).map { |value| normalize_username(value) }.reject(&:blank?)
  221. target_linked = Array(target_metadata["linked_usernames"]).map { |value| normalize_username(value) }.reject(&:blank?)
  222. target_metadata["linked_usernames"] = (target_linked + source_linked).uniq.first(MAX_LINKED_USERNAMES)
  223. merge_history = Array(target_metadata["merge_history"]).select { |row| row.is_a?(Hash) }.first(40)
  224. merge_history << {
  225. "source_person_id" => source_person.id,
  226. "source_label" => source_person.label.to_s.presence,
  227. "source_real_person_status" => source_feedback["real_person_status"].to_s.presence,
  228. "moved_post_faces" => moved_post_faces.to_i,
  229. "moved_story_faces" => moved_story_faces.to_i,
  230. "merged_at" => merged_at.iso8601
  231. }.compact
  232. target_metadata["merge_history"] = merge_history.last(40)
  233. target_metadata
  234. end
  235. def merged_embedding(target_person:, source_person:)
  236. left = normalize_vector(target_person.canonical_embedding)
  237. right = normalize_vector(source_person.canonical_embedding)
  238. return left if right.empty?
  239. return right if left.empty?
  240. left_count = [ target_person.appearance_count.to_i, 1 ].max
  241. right_count = [ source_person.appearance_count.to_i, 1 ].max
  242. combined = left.each_with_index.map do |value, idx|
  243. ((value * left_count) + (right[idx] * right_count)) / (left_count + right_count).to_f
  244. end
  245. normalize_vector(combined)
  246. end
  247. def recompute_person_after_face_change!(person:, timestamp:)
  248. remaining_count = recompute_appearance_count(person)
  249. metadata = normalize_metadata(person.metadata)
  250. feedback = normalize_feedback(metadata)
  251. feedback["last_action"] = "separate_face_applied"
  252. feedback["last_action_at"] = timestamp.iso8601
  253. feedback["feedback_version"] = FEEDBACK_VERSION
  254. metadata["user_feedback"] = feedback
  255. attrs = {
  256. appearance_count: remaining_count,
  257. metadata: metadata
  258. }
  259. if remaining_count <= 0
  260. attrs[:canonical_embedding] = nil
  261. attrs[:canonical_embedding_vector] = nil if person.respond_to?(:canonical_embedding_vector=)
  262. end
  263. person.update!(attrs)
  264. person.sync_identity_confidence!(timestamp: timestamp)
  265. end
  266. def recompute_appearance_count(person)
  267. count = person.instagram_post_faces.count + person.instagram_story_faces.count
  268. count.positive? ? count : 0
  269. end
  270. def normalize_real_person_status(value)
  271. token = value.to_s.strip.presence || "confirmed_real_person"
  272. return "confirmed_real_person" if token == "confirmed"
  273. return "likely_real_person" if token == "likely"
  274. token
  275. end
  276. def normalize_metadata(value)
  277. value.is_a?(Hash) ? value.deep_dup : {}
  278. end
  279. def normalize_feedback(metadata)
  280. value = metadata["user_feedback"]
  281. value.is_a?(Hash) ? value.deep_dup : {}
  282. end
  283. def normalize_username(value)
  284. token = value.to_s.strip.downcase
  285. return nil if token.blank?
  286. token = token.delete_prefix("@")
  287. token = token.gsub(/[^a-z0-9._]/, "")
  288. return nil unless token.length.between?(2, 30)
  289. token
  290. end
  291. def normalize_vector(values)
  292. vector = Array(values).map(&:to_f)
  293. return [] if vector.empty?
  294. norm = Math.sqrt(vector.sum { |value| value * value })
  295. return [] if norm <= 0.0
  296. vector.map { |value| value / norm }
  297. end
  298. end

app/services/personalization_engine.rb

0.0% lines covered

100.0% branches covered

39 relevant lines. 0 lines covered and 39 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class PersonalizationEngine
  2. DEFAULT_PROFILE = {
  3. tone: "friendly",
  4. interests: [],
  5. emoji_style: "moderate",
  6. engagement_style: "supportive"
  7. }.freeze
  8. def build(profile:)
  9. behavior = profile.instagram_profile_behavior_profile
  10. summary = behavior&.behavioral_summary.is_a?(Hash) ? behavior.behavioral_summary : {}
  11. interests = summary.fetch("content_categories", {}).to_h.keys.first(8)
  12. {
  13. tone: infer_tone(summary),
  14. interests: interests,
  15. emoji_style: infer_emoji_style(summary),
  16. engagement_style: infer_engagement_style(summary)
  17. }
  18. rescue StandardError
  19. DEFAULT_PROFILE
  20. end
  21. private
  22. def infer_tone(summary)
  23. sentiment = summary.fetch("sentiment_trend", {}).to_h.max_by { |_key, value| value.to_i }&.first.to_s
  24. return "optimistic" if sentiment == "positive"
  25. return "calm" if sentiment == "neutral"
  26. return "empathetic" if sentiment == "negative"
  27. "friendly"
  28. end
  29. def infer_emoji_style(summary)
  30. tag_count = summary.fetch("top_hashtags", {}).to_h.values.sum(&:to_i)
  31. return "light" if tag_count < 5
  32. return "moderate" if tag_count < 25
  33. "expressive"
  34. end
  35. def infer_engagement_style(summary)
  36. recurring = summary.fetch("frequent_secondary_persons", []).size
  37. recurring >= 3 ? "community" : "supportive"
  38. end
  39. end

app/services/pipeline/account_processing_coordinator.rb

0.0% lines covered

100.0% branches covered

211 relevant lines. 0 lines covered and 211 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module Pipeline
  2. class AccountProcessingCoordinator
  3. STORY_SYNC_INTERVAL = 90.minutes
  4. FEED_SYNC_INTERVAL = 2.hours
  5. PROFILE_SCAN_INTERVAL = 75.minutes
  6. FALLBACK_PROFILE_REFRESH_INTERVAL = 45.minutes
  7. def initialize(account:, trigger_source:, now: Time.current)
  8. @account = account
  9. @trigger_source = trigger_source.to_s.presence || "unspecified"
  10. @now = now
  11. end
  12. def run!
  13. stats = {
  14. trigger_source: @trigger_source,
  15. started_at: @now.iso8601(3),
  16. enqueued_jobs: [],
  17. skipped_jobs: []
  18. }
  19. health = Ops::LocalAiHealth.check
  20. stats[:local_ai_health] = health
  21. if due_for_story_sync?
  22. if health_ok?(health)
  23. enqueue_story_sync!(stats)
  24. @account.continuous_processing_next_story_sync_at = next_time(STORY_SYNC_INTERVAL)
  25. else
  26. stats[:skipped_jobs] << { job: "SyncHomeStoryCarouselJob", reason: "local_ai_unhealthy" }
  27. end
  28. end
  29. if due_for_feed_sync?
  30. if health_ok?(health)
  31. enqueue_feed_engagement!(stats)
  32. @account.continuous_processing_next_feed_sync_at = next_time(FEED_SYNC_INTERVAL)
  33. else
  34. stats[:skipped_jobs] << { job: "AutoEngageHomeFeedJob", reason: "local_ai_unhealthy" }
  35. end
  36. end
  37. if due_for_profile_scan?
  38. if health_ok?(health)
  39. enqueue_profile_scan!(stats)
  40. @account.continuous_processing_next_profile_scan_at = next_time(PROFILE_SCAN_INTERVAL)
  41. else
  42. enqueue_profile_refresh_fallback!(stats)
  43. @account.continuous_processing_next_profile_scan_at = next_time(FALLBACK_PROFILE_REFRESH_INTERVAL)
  44. end
  45. end
  46. enqueue_workspace_actions!(stats)
  47. @account.update!(
  48. continuous_processing_last_heartbeat_at: Time.current,
  49. continuous_processing_next_story_sync_at: @account.continuous_processing_next_story_sync_at,
  50. continuous_processing_next_feed_sync_at: @account.continuous_processing_next_feed_sync_at,
  51. continuous_processing_next_profile_scan_at: @account.continuous_processing_next_profile_scan_at
  52. )
  53. stats[:finished_at] = Time.current.iso8601(3)
  54. stats
  55. end
  56. private
  57. def due_for_story_sync?
  58. due?(@account.continuous_processing_next_story_sync_at)
  59. end
  60. def due_for_feed_sync?
  61. due?(@account.continuous_processing_next_feed_sync_at)
  62. end
  63. def due_for_profile_scan?
  64. due?(@account.continuous_processing_next_profile_scan_at)
  65. end
  66. def due?(timestamp)
  67. timestamp.blank? || timestamp <= @now
  68. end
  69. def health_ok?(health)
  70. ActiveModel::Type::Boolean.new.cast(health.is_a?(Hash) ? health[:ok] : false)
  71. end
  72. def enqueue_story_sync!(stats)
  73. job = SyncHomeStoryCarouselJob.perform_later(
  74. instagram_account_id: @account.id,
  75. story_limit: SyncHomeStoryCarouselJob::STORY_BATCH_LIMIT,
  76. auto_reply_only: false
  77. )
  78. stats[:enqueued_jobs] << {
  79. job: "SyncHomeStoryCarouselJob",
  80. active_job_id: job.job_id,
  81. queue: job.queue_name,
  82. story_limit: SyncHomeStoryCarouselJob::STORY_BATCH_LIMIT
  83. }
  84. Ops::StructuredLogger.info(
  85. event: "continuous_processing.story_sync_enqueued",
  86. payload: {
  87. account_id: @account.id,
  88. active_job_id: job.job_id,
  89. trigger_source: @trigger_source
  90. }
  91. )
  92. end
  93. def enqueue_feed_engagement!(stats)
  94. job = AutoEngageHomeFeedJob.perform_later(
  95. instagram_account_id: @account.id,
  96. max_posts: 2,
  97. include_story: false,
  98. story_hold_seconds: 18
  99. )
  100. stats[:enqueued_jobs] << {
  101. job: "AutoEngageHomeFeedJob",
  102. active_job_id: job.job_id,
  103. queue: job.queue_name,
  104. max_posts: 2
  105. }
  106. Ops::StructuredLogger.info(
  107. event: "continuous_processing.feed_engagement_enqueued",
  108. payload: {
  109. account_id: @account.id,
  110. active_job_id: job.job_id,
  111. trigger_source: @trigger_source
  112. }
  113. )
  114. end
  115. def enqueue_profile_scan!(stats)
  116. job = EnqueueRecentProfilePostScansForAccountJob.perform_later(
  117. instagram_account_id: @account.id,
  118. limit_per_account: 6,
  119. posts_limit: 3,
  120. comments_limit: 8
  121. )
  122. stats[:enqueued_jobs] << {
  123. job: "EnqueueRecentProfilePostScansForAccountJob",
  124. active_job_id: job.job_id,
  125. queue: job.queue_name,
  126. limit_per_account: 6,
  127. posts_limit: 3,
  128. comments_limit: 8
  129. }
  130. Ops::StructuredLogger.info(
  131. event: "continuous_processing.profile_scan_enqueued",
  132. payload: {
  133. account_id: @account.id,
  134. active_job_id: job.job_id,
  135. trigger_source: @trigger_source
  136. }
  137. )
  138. end
  139. def enqueue_profile_refresh_fallback!(stats)
  140. job = SyncNextProfilesForAccountJob.perform_later(
  141. instagram_account_id: @account.id,
  142. limit: 10
  143. )
  144. stats[:enqueued_jobs] << {
  145. job: "SyncNextProfilesForAccountJob",
  146. active_job_id: job.job_id,
  147. queue: job.queue_name,
  148. limit: 10,
  149. fallback_reason: "local_ai_unhealthy"
  150. }
  151. Ops::StructuredLogger.warn(
  152. event: "continuous_processing.profile_refresh_fallback_enqueued",
  153. payload: {
  154. account_id: @account.id,
  155. active_job_id: job.job_id,
  156. trigger_source: @trigger_source
  157. }
  158. )
  159. end
  160. def enqueue_workspace_actions!(stats)
  161. result = Workspace::ActionsTodoQueueService.new(
  162. account: @account,
  163. limit: 40,
  164. enqueue_processing: true
  165. ).fetch!
  166. queue_stats = result[:stats].is_a?(Hash) ? result[:stats] : {}
  167. stats[:enqueued_jobs] << {
  168. job: "Workspace::ActionsTodoQueueService",
  169. source: "continuous_processing",
  170. queued_now: queue_stats[:enqueued_now].to_i,
  171. ready_items: queue_stats[:ready_items].to_i,
  172. processing_items: queue_stats[:processing_items].to_i,
  173. total_items: queue_stats[:total_items].to_i
  174. }
  175. Ops::StructuredLogger.info(
  176. event: "continuous_processing.workspace_actions_refreshed",
  177. payload: {
  178. account_id: @account.id,
  179. trigger_source: @trigger_source,
  180. queued_now: queue_stats[:enqueued_now].to_i,
  181. ready_items: queue_stats[:ready_items].to_i,
  182. processing_items: queue_stats[:processing_items].to_i,
  183. total_items: queue_stats[:total_items].to_i
  184. }
  185. )
  186. rescue StandardError => e
  187. stats[:skipped_jobs] << {
  188. job: "Workspace::ActionsTodoQueueService",
  189. reason: "workspace_queue_refresh_failed",
  190. error_class: e.class.name
  191. }
  192. Ops::StructuredLogger.warn(
  193. event: "continuous_processing.workspace_actions_refresh_failed",
  194. payload: {
  195. account_id: @account.id,
  196. trigger_source: @trigger_source,
  197. error_class: e.class.name,
  198. error_message: e.message.to_s.byteslice(0, 280)
  199. }
  200. )
  201. end
  202. def next_time(interval)
  203. @now + jitter(interval)
  204. end
  205. def jitter(interval)
  206. seconds = interval.to_i
  207. jitter = (seconds * 0.12).to_i
  208. seconds + rand(0..jitter)
  209. end
  210. end
  211. end

app/services/post_face_recognition_service.rb

0.0% lines covered

100.0% branches covered

356 relevant lines. 0 lines covered and 356 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class PostFaceRecognitionService
  2. DEFAULT_MATCH_MIN_CONFIDENCE = ENV.fetch("POST_FACE_MATCH_MIN_CONFIDENCE", "0.78").to_f
  3. def initialize(
  4. face_detection_service: FaceDetectionService.new,
  5. face_embedding_service: FaceEmbeddingService.new,
  6. vector_matching_service: VectorMatchingService.new,
  7. face_identity_resolution_service: FaceIdentityResolutionService.new,
  8. match_min_confidence: nil
  9. )
  10. @face_detection_service = face_detection_service
  11. @face_embedding_service = face_embedding_service
  12. @vector_matching_service = vector_matching_service
  13. @face_identity_resolution_service = face_identity_resolution_service
  14. @match_min_confidence = begin
  15. value = match_min_confidence.nil? ? DEFAULT_MATCH_MIN_CONFIDENCE : match_min_confidence.to_f
  16. value.negative? ? DEFAULT_MATCH_MIN_CONFIDENCE : value
  17. rescue StandardError
  18. DEFAULT_MATCH_MIN_CONFIDENCE
  19. end
  20. end
  21. def process!(post:)
  22. return { skipped: true, reason: "post_missing" } unless post
  23. return { skipped: true, reason: "media_missing" } unless post.media.attached?
  24. source_payload = load_face_detection_payload(post: post)
  25. if source_payload[:skipped]
  26. persist_face_recognition_metadata!(
  27. post: post,
  28. attributes: {
  29. "face_count" => post.instagram_post_faces.count,
  30. "matched_people" => [],
  31. "detection_source" => source_payload[:detection_source].to_s.presence || source_payload[:content_type].to_s.presence || "unknown",
  32. "detection_reason" => source_payload[:reason].to_s.presence || "face_detection_skipped",
  33. "detection_error" => source_payload[:error].to_s.presence,
  34. "updated_at" => Time.current.iso8601
  35. }.compact
  36. )
  37. return source_payload
  38. end
  39. image_bytes = source_payload[:image_bytes]
  40. detection = @face_detection_service.detect(
  41. media_payload: {
  42. story_id: "post:#{post.id}",
  43. image_bytes: image_bytes
  44. }
  45. )
  46. detection_metadata = detection[:metadata].is_a?(Hash) ? detection[:metadata] : {}
  47. detection_reason = detection_metadata[:reason].to_s.presence || detection_metadata["reason"].to_s.presence
  48. detection_error = detection_metadata[:error_message].to_s.presence || detection_metadata["error_message"].to_s.presence
  49. if detection_reason.present?
  50. persist_face_recognition_metadata!(
  51. post: post,
  52. attributes: {
  53. "face_count" => post.instagram_post_faces.count,
  54. "matched_people" => [],
  55. "detection_source" => source_payload[:detection_source],
  56. "detection_reason" => detection_reason,
  57. "detection_error" => detection_error,
  58. "detection_warnings" => Array(detection_metadata[:warnings] || detection_metadata["warnings"]).first(20),
  59. "updated_at" => Time.current.iso8601
  60. }.compact
  61. )
  62. return {
  63. skipped: true,
  64. reason: "face_detection_failed",
  65. detection_reason: detection_reason,
  66. detection_error: detection_error
  67. }
  68. end
  69. post.instagram_post_faces.delete_all
  70. matches = []
  71. linked_face_count = 0
  72. low_confidence_filtered_count = 0
  73. Array(detection[:faces]).each_with_index do |face, index|
  74. observation_signature = face_observation_signature(
  75. post: post,
  76. face: face,
  77. index: index,
  78. detection_source: source_payload[:detection_source]
  79. )
  80. confidence = face[:confidence].to_f
  81. unless linkable_face_confidence?(confidence)
  82. low_confidence_filtered_count += 1
  83. persist_unlinked_face!(
  84. post: post,
  85. face: face,
  86. observation_signature: observation_signature,
  87. source: source_payload[:detection_source],
  88. reason: "low_confidence"
  89. )
  90. next
  91. end
  92. embedding_payload = @face_embedding_service.embed(
  93. media_payload: {
  94. story_id: "post:#{post.id}",
  95. media_type: "image",
  96. image_bytes: image_bytes
  97. },
  98. face: face
  99. )
  100. vector = Array(embedding_payload[:vector]).map(&:to_f)
  101. if vector.empty?
  102. persist_unlinked_face!(
  103. post: post,
  104. face: face,
  105. observation_signature: observation_signature,
  106. source: source_payload[:detection_source],
  107. reason: "embedding_unavailable"
  108. )
  109. next
  110. end
  111. match = @vector_matching_service.match_or_create!(
  112. account: post.instagram_account,
  113. profile: post.instagram_profile,
  114. embedding: vector,
  115. occurred_at: post.taken_at || Time.current,
  116. observation_signature: observation_signature
  117. )
  118. person = match[:person]
  119. update_person_face_attributes!(person: person, face: face)
  120. post.instagram_post_faces.create!(
  121. instagram_story_person: person,
  122. role: match[:role].to_s.presence || "unknown",
  123. detector_confidence: confidence,
  124. match_similarity: match[:similarity],
  125. embedding_version: embedding_payload[:version].to_s,
  126. embedding: vector,
  127. bounding_box: face[:bounding_box],
  128. metadata: face_record_metadata(
  129. source: source_payload[:detection_source],
  130. face: face,
  131. observation_signature: observation_signature,
  132. link_status: "matched"
  133. )
  134. )
  135. linked_face_count += 1
  136. matches << {
  137. person_id: person.id,
  138. role: match[:role],
  139. label: person.label,
  140. similarity: match[:similarity],
  141. owner_match: match[:role].to_s == "primary_user",
  142. recurring_face: person.appearance_count.to_i > 1,
  143. appearances: person.appearance_count.to_i,
  144. real_person_status: person.real_person_status,
  145. identity_confidence: person.identity_confidence
  146. }.compact
  147. end
  148. total_detected_faces = Array(detection[:faces]).length
  149. persist_face_recognition_metadata!(
  150. post: post,
  151. attributes: {
  152. "face_count" => total_detected_faces,
  153. "linked_face_count" => linked_face_count,
  154. "unlinked_face_count" => [ total_detected_faces - linked_face_count, 0 ].max,
  155. "low_confidence_filtered_count" => low_confidence_filtered_count,
  156. "min_match_confidence" => @match_min_confidence.round(3),
  157. "matched_people" => matches,
  158. "detection_source" => source_payload[:detection_source],
  159. "ocr_text" => detection[:ocr_text].to_s,
  160. "objects" => Array(detection[:content_signals]),
  161. "hashtags" => Array(detection[:hashtags]),
  162. "mentions" => Array(detection[:mentions]),
  163. "profile_handles" => Array(detection[:profile_handles]),
  164. "detection_warnings" => Array(detection_metadata[:warnings] || detection_metadata["warnings"]).first(20),
  165. "updated_at" => Time.current.iso8601
  166. }.compact
  167. )
  168. identity_resolution = @face_identity_resolution_service.resolve_for_post!(
  169. post: post,
  170. extracted_usernames: (
  171. Array(detection[:mentions]) +
  172. Array(detection[:profile_handles]) +
  173. detection[:ocr_text].to_s.scan(/@[a-zA-Z0-9._]{2,30}/)
  174. ),
  175. content_summary: detection
  176. )
  177. if identity_resolution.is_a?(Hash) && identity_resolution[:summary].is_a?(Hash)
  178. persist_face_recognition_metadata!(
  179. post: post,
  180. attributes: {
  181. "identity" => identity_resolution[:summary],
  182. "participant_summary" => identity_resolution[:summary][:participant_summary_text].to_s
  183. }
  184. )
  185. end
  186. {
  187. skipped: false,
  188. face_count: total_detected_faces,
  189. linked_face_count: linked_face_count,
  190. low_confidence_filtered_count: low_confidence_filtered_count,
  191. matched_people: matches,
  192. identity_resolution: identity_resolution
  193. }
  194. rescue StandardError => e
  195. if post&.persisted?
  196. persist_face_recognition_metadata!(
  197. post: post,
  198. attributes: {
  199. "face_count" => post.instagram_post_faces.count,
  200. "matched_people" => [],
  201. "detection_source" => "post_face_recognition",
  202. "detection_reason" => "recognition_error",
  203. "detection_error" => e.message.to_s,
  204. "updated_at" => Time.current.iso8601
  205. }
  206. )
  207. end
  208. {
  209. skipped: true,
  210. reason: "recognition_error",
  211. error: e.message.to_s
  212. }
  213. end
  214. private
  215. def persist_face_recognition_metadata!(post:, attributes:)
  216. post.with_lock do
  217. post.reload
  218. metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
  219. current = metadata["face_recognition"].is_a?(Hash) ? metadata["face_recognition"].deep_dup : {}
  220. metadata["face_recognition"] = current.merge(attributes.to_h.compact)
  221. post.update!(metadata: metadata)
  222. end
  223. rescue StandardError
  224. nil
  225. end
  226. def load_face_detection_payload(post:)
  227. content_type = post.media.blob&.content_type.to_s
  228. if content_type.start_with?("image/")
  229. return {
  230. skipped: false,
  231. image_bytes: post.media.download,
  232. detection_source: "post_media_image",
  233. content_type: content_type
  234. }
  235. end
  236. if content_type.start_with?("video/")
  237. if post.preview_image.attached?
  238. return {
  239. skipped: false,
  240. image_bytes: post.preview_image.download,
  241. detection_source: "post_preview_image",
  242. content_type: post.preview_image.blob&.content_type.to_s
  243. }
  244. end
  245. begin
  246. generated_preview = post.media.preview(resize_to_limit: [ 960, 960 ]).processed
  247. preview_blob = generated_preview.respond_to?(:image) ? generated_preview.image : nil
  248. return {
  249. skipped: false,
  250. image_bytes: generated_preview.download,
  251. detection_source: "post_generated_video_preview",
  252. content_type: preview_blob&.content_type.to_s.presence || "image/jpeg"
  253. }
  254. rescue StandardError
  255. return {
  256. skipped: true,
  257. reason: "video_preview_unavailable",
  258. content_type: content_type
  259. }
  260. end
  261. end
  262. {
  263. skipped: true,
  264. reason: "unsupported_content_type",
  265. content_type: content_type
  266. }
  267. rescue StandardError => e
  268. {
  269. skipped: true,
  270. reason: "media_load_error",
  271. error: e.message.to_s,
  272. content_type: content_type.to_s
  273. }
  274. end
  275. def face_observation_signature(post:, face:, index:, detection_source:)
  276. bbox = face[:bounding_box].is_a?(Hash) ? face[:bounding_box] : {}
  277. [
  278. "post",
  279. post.id,
  280. detection_source.to_s,
  281. index.to_i,
  282. bbox["x1"],
  283. bbox["y1"],
  284. bbox["x2"],
  285. bbox["y2"]
  286. ].map(&:to_s).join(":")
  287. end
  288. def linkable_face_confidence?(confidence)
  289. confidence.to_f >= @match_min_confidence
  290. end
  291. def persist_unlinked_face!(post:, face:, observation_signature:, source:, reason:)
  292. post.instagram_post_faces.create!(
  293. instagram_story_person: nil,
  294. role: "unknown",
  295. detector_confidence: face[:confidence].to_f,
  296. match_similarity: nil,
  297. embedding_version: nil,
  298. embedding: nil,
  299. bounding_box: face[:bounding_box],
  300. metadata: face_record_metadata(
  301. source: source,
  302. face: face,
  303. observation_signature: observation_signature,
  304. link_status: "unlinked",
  305. link_skip_reason: reason
  306. )
  307. )
  308. rescue StandardError
  309. nil
  310. end
  311. def face_record_metadata(source:, face:, observation_signature:, link_status:, link_skip_reason: nil)
  312. {
  313. source: source,
  314. landmarks: face[:landmarks],
  315. likelihoods: face[:likelihoods],
  316. age: face[:age],
  317. age_range: face[:age_range],
  318. gender: face[:gender],
  319. gender_score: face[:gender_score].to_f,
  320. observation_signature: observation_signature,
  321. link_status: link_status,
  322. link_skip_reason: link_skip_reason
  323. }.compact
  324. end
  325. def update_person_face_attributes!(person:, face:)
  326. return unless person
  327. metadata = person.metadata.is_a?(Hash) ? person.metadata.deep_dup : {}
  328. attrs = metadata["face_attributes"].is_a?(Hash) ? metadata["face_attributes"].deep_dup : {}
  329. gender = face[:gender].to_s.strip.downcase
  330. if gender.present?
  331. gender_counts = attrs["gender_counts"].is_a?(Hash) ? attrs["gender_counts"].deep_dup : {}
  332. gender_counts[gender] = gender_counts[gender].to_i + 1
  333. attrs["gender_counts"] = gender_counts
  334. attrs["primary_gender_cue"] = gender_counts.max_by { |_key, count| count.to_i }&.first
  335. end
  336. age_range = face[:age_range].to_s.strip
  337. if age_range.present?
  338. age_counts = attrs["age_range_counts"].is_a?(Hash) ? attrs["age_range_counts"].deep_dup : {}
  339. age_counts[age_range] = age_counts[age_range].to_i + 1
  340. attrs["age_range_counts"] = age_counts
  341. attrs["primary_age_range"] = age_counts.max_by { |_key, count| count.to_i }&.first
  342. end
  343. age_value = face[:age].to_f
  344. if age_value.positive?
  345. samples = Array(attrs["age_samples"]).map(&:to_f).first(19)
  346. samples << age_value.round(1)
  347. attrs["age_samples"] = samples
  348. attrs["age_estimate"] = (samples.sum / samples.length.to_f).round(1)
  349. end
  350. attrs["last_observed_at"] = Time.current.iso8601
  351. metadata["face_attributes"] = attrs
  352. person.update_columns(metadata: metadata, updated_at: Time.current)
  353. rescue StandardError
  354. nil
  355. end
  356. end

app/services/post_video_context_extraction_service.rb

0.0% lines covered

100.0% branches covered

356 relevant lines. 0 lines covered and 356 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class PostVideoContextExtractionService
  2. MAX_VIDEO_BYTES = ENV.fetch("POST_VIDEO_CONTEXT_MAX_BYTES", 35 * 1024 * 1024).to_i
  3. MAX_DYNAMIC_INTELLIGENCE_BYTES = ENV.fetch("POST_VIDEO_DYNAMIC_INTELLIGENCE_MAX_BYTES", 20 * 1024 * 1024).to_i
  4. MAX_AUDIO_EXTRACTION_BYTES = ENV.fetch("POST_VIDEO_AUDIO_MAX_BYTES", 30 * 1024 * 1024).to_i
  5. MAX_AUDIO_DURATION_SECONDS = ENV.fetch("POST_VIDEO_AUDIO_MAX_DURATION_SECONDS", 180).to_i
  6. TRANSCRIPT_MAX_CHARS = ENV.fetch("POST_VIDEO_TRANSCRIPT_MAX_CHARS", 420).to_i
  7. TOPIC_LIMIT = ENV.fetch("POST_VIDEO_TOPIC_LIMIT", 30).to_i
  8. SIGNAL_LIMIT = ENV.fetch("POST_VIDEO_SIGNAL_LIMIT", 40).to_i
  9. def initialize(
  10. video_frame_change_detector_service: VideoFrameChangeDetectorService.new,
  11. video_metadata_service: VideoMetadataService.new,
  12. video_audio_extraction_service: VideoAudioExtractionService.new,
  13. speech_transcription_service: SpeechTranscriptionService.new,
  14. local_microservice_client: Ai::LocalMicroserviceClient.new,
  15. content_understanding_service: StoryContentUnderstandingService.new
  16. )
  17. @video_frame_change_detector_service = video_frame_change_detector_service
  18. @video_metadata_service = video_metadata_service
  19. @video_audio_extraction_service = video_audio_extraction_service
  20. @speech_transcription_service = speech_transcription_service
  21. @local_microservice_client = local_microservice_client
  22. @content_understanding_service = content_understanding_service
  23. end
  24. def extract(video_bytes:, reference_id:, content_type:)
  25. bytes = video_bytes.to_s.b
  26. return skipped_result(reason: "video_bytes_missing") if bytes.blank?
  27. if bytes.bytesize > MAX_VIDEO_BYTES
  28. return skipped_result(
  29. reason: "video_too_large_for_context_extraction",
  30. byte_size: bytes.bytesize,
  31. max_bytes: MAX_VIDEO_BYTES
  32. )
  33. end
  34. mode = @video_frame_change_detector_service.classify(
  35. video_bytes: bytes,
  36. reference_id: reference_id.to_s,
  37. content_type: content_type
  38. )
  39. processing_mode = mode[:processing_mode].to_s.presence || "dynamic_video"
  40. static_video = processing_mode == "static_image"
  41. semantic_route = static_video ? "image" : "video"
  42. probe = build_probe(
  43. bytes: bytes,
  44. reference_id: reference_id,
  45. content_type: content_type,
  46. mode: mode
  47. )
  48. duration_seconds = probe[:duration_seconds]
  49. probe_metadata = probe[:metadata].is_a?(Hash) ? probe[:metadata] : {}
  50. has_audio = ActiveModel::Type::Boolean.new.cast(probe_metadata["has_audio"] || probe_metadata[:has_audio])
  51. audio = extract_audio_if_allowed(
  52. bytes: bytes,
  53. reference_id: reference_id,
  54. content_type: content_type,
  55. duration_seconds: duration_seconds,
  56. has_audio: has_audio
  57. )
  58. transcript = transcribe_audio_if_available(audio: audio, reference_id: reference_id)
  59. transcript_text = truncate_text(transcript[:transcript].to_s, max: TRANSCRIPT_MAX_CHARS)
  60. local_video_intelligence = extract_local_video_intelligence_if_allowed(
  61. bytes: bytes,
  62. reference_id: reference_id,
  63. static_video: static_video
  64. )
  65. static_frame_intelligence = extract_static_frame_intelligence_if_available(
  66. mode: mode,
  67. reference_id: reference_id,
  68. static_video: static_video
  69. )
  70. detections =
  71. detections_from_static_frame_intelligence(static_frame_intelligence: static_frame_intelligence) +
  72. detections_from_local_intelligence(local_video_intelligence: local_video_intelligence)
  73. understanding = @content_understanding_service.build(
  74. media_type: semantic_route,
  75. detections: detections,
  76. transcript_text: transcript_text
  77. )
  78. topics = normalize_string_array(understanding[:topics], limit: TOPIC_LIMIT)
  79. objects = normalize_string_array(understanding[:objects], limit: SIGNAL_LIMIT)
  80. hashtags = normalize_string_array(understanding[:hashtags], limit: SIGNAL_LIMIT)
  81. mentions = normalize_string_array(understanding[:mentions], limit: SIGNAL_LIMIT)
  82. profile_handles = normalize_string_array(understanding[:profile_handles], limit: SIGNAL_LIMIT)
  83. {
  84. skipped: false,
  85. processing_mode: processing_mode,
  86. static: ActiveModel::Type::Boolean.new.cast(mode[:static]) || static_video,
  87. semantic_route: semantic_route,
  88. duration_seconds: duration_seconds,
  89. has_audio: has_audio,
  90. transcript: transcript_text.presence,
  91. topics: topics,
  92. objects: objects,
  93. scenes: normalize_hash_array(understanding[:scenes], limit: SIGNAL_LIMIT),
  94. hashtags: hashtags,
  95. mentions: mentions,
  96. profile_handles: profile_handles,
  97. ocr_text: understanding[:ocr_text].to_s.presence,
  98. ocr_blocks: normalize_hash_array(understanding[:ocr_blocks], limit: SIGNAL_LIMIT),
  99. context_summary: context_summary(
  100. processing_mode: processing_mode,
  101. duration_seconds: duration_seconds,
  102. topics: topics,
  103. transcript: transcript_text
  104. ),
  105. metadata: {
  106. frame_change_detection: mode[:metadata].is_a?(Hash) ? mode[:metadata] : {},
  107. video_probe: probe_metadata,
  108. audio_extraction: audio[:metadata],
  109. transcription: transcript[:metadata],
  110. static_frame_intelligence: static_frame_intelligence[:metadata],
  111. local_video_intelligence: local_video_intelligence[:metadata]
  112. }
  113. }
  114. rescue StandardError => e
  115. skipped_result(
  116. reason: "video_context_extraction_error",
  117. error_class: e.class.name,
  118. error_message: e.message.to_s
  119. )
  120. end
  121. private
  122. def build_probe(bytes:, reference_id:, content_type:, mode:)
  123. probe_metadata = mode.dig(:metadata, :video_probe)
  124. probe_duration = mode[:duration_seconds]
  125. if probe_metadata.is_a?(Hash) && (probe_duration.to_f.positive? || probe_metadata.present?)
  126. return {
  127. duration_seconds: probe_duration,
  128. metadata: probe_metadata
  129. }
  130. end
  131. @video_metadata_service.probe(
  132. video_bytes: bytes,
  133. story_id: reference_id.to_s,
  134. content_type: content_type
  135. )
  136. rescue StandardError => e
  137. {
  138. duration_seconds: nil,
  139. metadata: {
  140. reason: "video_probe_failed",
  141. error_class: e.class.name,
  142. error_message: e.message.to_s
  143. }
  144. }
  145. end
  146. def extract_audio_if_allowed(bytes:, reference_id:, content_type:, duration_seconds:, has_audio:)
  147. return empty_audio(reason: "no_audio_stream") unless has_audio
  148. if bytes.bytesize > MAX_AUDIO_EXTRACTION_BYTES
  149. return empty_audio(reason: "video_too_large_for_audio_extraction")
  150. end
  151. if duration_seconds.to_f.positive? && duration_seconds.to_f > MAX_AUDIO_DURATION_SECONDS
  152. return empty_audio(reason: "video_too_long_for_audio_extraction")
  153. end
  154. @video_audio_extraction_service.extract(
  155. video_bytes: bytes,
  156. story_id: reference_id.to_s,
  157. content_type: content_type
  158. )
  159. rescue StandardError => e
  160. empty_audio(reason: "audio_extraction_error", error_class: e.class.name, error_message: e.message.to_s)
  161. end
  162. def transcribe_audio_if_available(audio:, reference_id:)
  163. audio_bytes = audio[:audio_bytes].to_s.b
  164. return empty_transcript(reason: "audio_unavailable") if audio_bytes.blank?
  165. @speech_transcription_service.transcribe(
  166. audio_bytes: audio_bytes,
  167. story_id: reference_id.to_s
  168. )
  169. rescue StandardError => e
  170. empty_transcript(reason: "transcription_error", error_class: e.class.name, error_message: e.message.to_s)
  171. end
  172. def extract_local_video_intelligence_if_allowed(bytes:, reference_id:, static_video:)
  173. if static_video
  174. return {
  175. data: {},
  176. metadata: { reason: "static_video_routed_to_image" }
  177. }
  178. end
  179. if bytes.bytesize > MAX_DYNAMIC_INTELLIGENCE_BYTES
  180. return {
  181. data: {},
  182. metadata: { reason: "video_too_large_for_dynamic_intelligence" }
  183. }
  184. end
  185. data = @local_microservice_client.analyze_video_story_intelligence!(
  186. video_bytes: bytes,
  187. usage_context: {
  188. workflow: "post_analysis_pipeline",
  189. task: "video_context",
  190. reference_id: reference_id.to_s
  191. }
  192. )
  193. {
  194. data: data.is_a?(Hash) ? data : {},
  195. metadata: (data.is_a?(Hash) ? data["metadata"] : nil).is_a?(Hash) ? data["metadata"] : {}
  196. }
  197. rescue StandardError => e
  198. {
  199. data: {},
  200. metadata: {
  201. reason: "dynamic_intelligence_error",
  202. error_class: e.class.name,
  203. error_message: e.message.to_s
  204. }
  205. }
  206. end
  207. def extract_static_frame_intelligence_if_available(mode:, reference_id:, static_video:)
  208. unless static_video
  209. return {
  210. data: {},
  211. metadata: { reason: "dynamic_video_no_static_frame_analysis" }
  212. }
  213. end
  214. frame_bytes = mode[:frame_bytes].to_s.b
  215. if frame_bytes.blank?
  216. return {
  217. data: {},
  218. metadata: { reason: "static_frame_missing" }
  219. }
  220. end
  221. data = @local_microservice_client.detect_faces_and_ocr!(
  222. image_bytes: frame_bytes,
  223. usage_context: {
  224. workflow: "post_analysis_pipeline",
  225. task: "video_static_frame_context",
  226. reference_id: reference_id.to_s
  227. }
  228. )
  229. {
  230. data: data.is_a?(Hash) ? data : {},
  231. metadata: (data.is_a?(Hash) ? data["metadata"] : nil).is_a?(Hash) ? data["metadata"] : {}
  232. }
  233. rescue StandardError => e
  234. {
  235. data: {},
  236. metadata: {
  237. reason: "static_frame_intelligence_error",
  238. error_class: e.class.name,
  239. error_message: e.message.to_s
  240. }
  241. }
  242. end
  243. def detections_from_static_frame_intelligence(static_frame_intelligence:)
  244. data = static_frame_intelligence[:data].is_a?(Hash) ? static_frame_intelligence[:data] : {}
  245. return [] if data.empty?
  246. [ {
  247. faces: Array(data["faces"]).select { |row| row.is_a?(Hash) },
  248. content_signals: Array(data["content_labels"]).map(&:to_s),
  249. object_detections: Array(data["object_detections"]).select { |row| row.is_a?(Hash) },
  250. scenes: Array(data["scenes"]).select { |row| row.is_a?(Hash) },
  251. location_tags: Array(data["location_tags"]).map(&:to_s),
  252. ocr_text: data["ocr_text"].to_s,
  253. ocr_blocks: Array(data["ocr_blocks"]).select { |row| row.is_a?(Hash) },
  254. mentions: Array(data["mentions"]).map(&:to_s),
  255. hashtags: Array(data["hashtags"]).map(&:to_s),
  256. profile_handles: Array(data["profile_handles"]).map(&:to_s)
  257. } ]
  258. end
  259. def detections_from_local_intelligence(local_video_intelligence:)
  260. data = local_video_intelligence[:data].is_a?(Hash) ? local_video_intelligence[:data] : {}
  261. return [] if data.empty?
  262. [ {
  263. content_signals: Array(data["content_labels"]).map(&:to_s),
  264. object_detections: Array(data["object_detections"]).select { |row| row.is_a?(Hash) },
  265. scenes: Array(data["scenes"]).select { |row| row.is_a?(Hash) },
  266. ocr_text: data["ocr_text"].to_s,
  267. ocr_blocks: Array(data["ocr_blocks"]).select { |row| row.is_a?(Hash) },
  268. mentions: Array(data["mentions"]).map(&:to_s),
  269. hashtags: Array(data["hashtags"]).map(&:to_s),
  270. profile_handles: Array(data["profile_handles"]).map(&:to_s)
  271. } ]
  272. end
  273. def context_summary(processing_mode:, duration_seconds:, topics:, transcript:)
  274. parts = []
  275. if processing_mode.to_s == "static_image"
  276. parts << "Static visual video detected and routed through image-style analysis."
  277. end
  278. if duration_seconds.to_f.positive?
  279. parts << "Duration #{duration_seconds.to_f.round(2)}s."
  280. end
  281. if topics.any?
  282. parts << "Topics: #{topics.first(6).join(', ')}."
  283. end
  284. if transcript.to_s.present?
  285. parts << "Audio transcript: #{truncate_text(transcript, max: 140)}."
  286. end
  287. text = parts.join(" ").strip
  288. text.presence
  289. end
  290. def normalize_string_array(values, limit:)
  291. Array(values)
  292. .map(&:to_s)
  293. .map(&:strip)
  294. .reject(&:blank?)
  295. .uniq
  296. .first(limit)
  297. end
  298. def normalize_hash_array(values, limit:)
  299. Array(values).select { |row| row.is_a?(Hash) }.first(limit)
  300. end
  301. def truncate_text(value, max:)
  302. text = value.to_s.strip
  303. return text if text.length <= max
  304. "#{text.byteslice(0, max)}..."
  305. end
  306. def empty_audio(reason:, error_class: nil, error_message: nil)
  307. {
  308. audio_bytes: nil,
  309. content_type: nil,
  310. metadata: {
  311. source: "video_audio_extraction",
  312. reason: reason.to_s,
  313. error_class: error_class.to_s.presence,
  314. error_message: error_message.to_s.presence
  315. }.compact
  316. }
  317. end
  318. def empty_transcript(reason:, error_class: nil, error_message: nil)
  319. {
  320. transcript: nil,
  321. metadata: {
  322. source: "speech_transcription",
  323. reason: reason.to_s,
  324. error_class: error_class.to_s.presence,
  325. error_message: error_message.to_s.presence
  326. }.compact
  327. }
  328. end
  329. def skipped_result(reason:, byte_size: nil, max_bytes: nil, error_class: nil, error_message: nil)
  330. {
  331. skipped: true,
  332. processing_mode: "dynamic_video",
  333. static: false,
  334. semantic_route: "video",
  335. duration_seconds: nil,
  336. has_audio: nil,
  337. transcript: nil,
  338. topics: [],
  339. objects: [],
  340. scenes: [],
  341. hashtags: [],
  342. mentions: [],
  343. profile_handles: [],
  344. ocr_text: nil,
  345. ocr_blocks: [],
  346. context_summary: nil,
  347. metadata: {
  348. reason: reason.to_s,
  349. byte_size: byte_size,
  350. max_bytes: max_bytes,
  351. error_class: error_class.to_s.presence,
  352. error_message: error_message.to_s.presence
  353. }.compact
  354. }
  355. end
  356. end

app/services/response_generation_service.rb

0.0% lines covered

100.0% branches covered

49 relevant lines. 0 lines covered and 49 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class ResponseGenerationService
  2. def initialize(personalization_engine: PersonalizationEngine.new)
  3. @personalization_engine = personalization_engine
  4. end
  5. def generate(profile:, content_understanding:, max_suggestions: 5)
  6. persona = @personalization_engine.build(profile: profile)
  7. topics = Array(content_understanding[:topics]).first(5)
  8. sentiment = content_understanding[:sentiment].to_s
  9. suggestions = base_templates(tone: persona[:tone], sentiment: sentiment).map do |template|
  10. topic = topics.first
  11. topic.present? ? template.gsub("{topic}", topic) : template.gsub(" {topic}", "")
  12. end
  13. suggestions.map!(&:strip)
  14. suggestions.uniq.first(max_suggestions.to_i.clamp(1, 10))
  15. end
  16. private
  17. def base_templates(tone:, sentiment:)
  18. return empathetic_templates if tone == "empathetic" || sentiment == "negative"
  19. return optimistic_templates if tone == "optimistic" || sentiment == "positive"
  20. neutral_templates
  21. end
  22. def optimistic_templates
  23. [
  24. "Love this energy around {topic}.",
  25. "This looks amazing, especially the {topic} moment.",
  26. "Big fan of this one, great vibe.",
  27. "This is strong content. Keep it coming.",
  28. "Great share, this feels really authentic."
  29. ]
  30. end
  31. def empathetic_templates
  32. [
  33. "Appreciate you sharing this.",
  34. "Sending support your way.",
  35. "This felt real and honest.",
  36. "Thanks for posting this perspective.",
  37. "Rooting for you."
  38. ]
  39. end
  40. def neutral_templates
  41. [
  42. "Nice story update.",
  43. "This was a good share.",
  44. "Loved the {topic} angle here.",
  45. "Clean and engaging post.",
  46. "Great context in this one."
  47. ]
  48. end
  49. end

app/services/speech_transcription_service.rb

0.0% lines covered

100.0% branches covered

112 relevant lines. 0 lines covered and 112 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "open3"
  2. require "shellwords"
  3. require "tempfile"
  4. require "tmpdir"
  5. require "net/http"
  6. require "json"
  7. class SpeechTranscriptionService
  8. def initialize(whisper_bin: ENV.fetch("WHISPER_BIN", "whisper"), whisper_model: ENV.fetch("WHISPER_MODEL", "base"), use_microservice: ENV.fetch("USE_LOCAL_AI_MICROSERVICE", "true") == "true")
  9. @whisper_bin = whisper_bin.to_s
  10. @whisper_model = whisper_model.to_s
  11. @use_microservice = use_microservice
  12. @microservice_url = ENV.fetch("LOCAL_AI_SERVICE_URL", "http://localhost:8000")
  13. end
  14. def transcribe(audio_bytes:, story_id:)
  15. return empty_result("audio_bytes_missing") if audio_bytes.blank?
  16. # Try microservice first if enabled
  17. if @use_microservice
  18. microservice_result = transcribe_with_microservice(audio_bytes, story_id)
  19. return microservice_result if microservice_result[:transcript].present?
  20. end
  21. # Fallback to local Whisper binary
  22. return empty_result("whisper_missing") unless command_available?(@whisper_bin)
  23. transcribe_with_binary(audio_bytes, story_id)
  24. rescue StandardError => e
  25. empty_result("transcription_error", stderr: e.message)
  26. end
  27. private
  28. def transcribe_with_microservice(audio_bytes, story_id)
  29. Tempfile.create([ "story_audio_#{story_id}", ".wav" ]) do |audio_file|
  30. audio_file.binmode
  31. audio_file.write(audio_bytes)
  32. audio_file.flush
  33. uri = URI.parse("#{@microservice_url}/transcribe/audio")
  34. # Create multipart form data
  35. boundary = "----WebKitFormBoundary#{SecureRandom.hex(16)}"
  36. post_body = []
  37. post_body << "--#{boundary}\r\n"
  38. post_body << "Content-Disposition: form-data; name=\"file\"; filename=\"audio.wav\"\r\n"
  39. post_body << "Content-Type: application/octet-stream\r\n\r\n"
  40. post_body << audio_bytes
  41. post_body << "\r\n"
  42. post_body << "--#{boundary}\r\n"
  43. post_body << "Content-Disposition: form-data; name=\"model\"\r\n\r\n"
  44. post_body << @whisper_model
  45. post_body << "\r\n"
  46. post_body << "--#{boundary}--\r\n"
  47. http = Net::HTTP.new(uri.host, uri.port)
  48. http.open_timeout = 30
  49. http.read_timeout = 120
  50. request = Net::HTTP::Post.new(uri.request_uri)
  51. request["Content-Type"] = "multipart/form-data; boundary=#{boundary}"
  52. request["Accept"] = "application/json"
  53. request.body = post_body.join
  54. response = http.request(request)
  55. body = JSON.parse(response.body.to_s.presence || "{}")
  56. if response.is_a?(Net::HTTPSuccess) && body["success"]
  57. {
  58. transcript: body["transcript"],
  59. metadata: {
  60. source: "local_microservice",
  61. model: @whisper_model,
  62. confidence: body.dig("metadata", "confidence")
  63. }
  64. }
  65. else
  66. empty_result("microservice_error", stderr: body.dig("error"))
  67. end
  68. end
  69. rescue StandardError => e
  70. empty_result("microservice_error", stderr: e.message)
  71. end
  72. def transcribe_with_binary(audio_bytes, story_id)
  73. Tempfile.create([ "story_audio_#{story_id}", ".wav" ]) do |audio_file|
  74. audio_file.binmode
  75. audio_file.write(audio_bytes)
  76. audio_file.flush
  77. Dir.mktmpdir("story_whisper_#{story_id}_") do |output_dir|
  78. cmd = [
  79. @whisper_bin,
  80. audio_file.path,
  81. "--model", @whisper_model,
  82. "--output_format", "txt",
  83. "--output_dir", output_dir,
  84. "--task", "transcribe"
  85. ]
  86. _stdout, stderr, status = Open3.capture3(*cmd)
  87. return empty_result("whisper_failed", stderr: stderr.to_s) unless status.success?
  88. txt_path = Dir[File.join(output_dir, "*.txt")].first
  89. return empty_result("transcript_missing") if txt_path.blank?
  90. text = File.read(txt_path).to_s.strip
  91. return empty_result("transcript_empty") if text.blank?
  92. {
  93. transcript: text,
  94. metadata: {
  95. source: "local_whisper_binary",
  96. model: @whisper_model
  97. }
  98. }
  99. end
  100. end
  101. end
  102. def command_available?(command)
  103. system("command -v #{Shellwords.escape(command)} >/dev/null 2>&1")
  104. end
  105. def empty_result(reason, stderr: nil)
  106. {
  107. transcript: nil,
  108. metadata: {
  109. source: "local_whisper",
  110. reason: reason,
  111. stderr: stderr.to_s.presence
  112. }.compact
  113. }
  114. end
  115. end

app/services/story_archive/media_preview_resolver.rb

0.0% lines covered

100.0% branches covered

37 relevant lines. 0 lines covered and 37 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. module StoryArchive
  2. class MediaPreviewResolver
  3. class << self
  4. def static_video_preview?(metadata:)
  5. data = metadata_hash(metadata)
  6. processing = metadata_hash(data["processing_metadata"])
  7. frame_change = metadata_hash(processing["frame_change_detection"])
  8. local_intelligence = metadata_hash(data["local_story_intelligence"])
  9. processing["source"].to_s == "video_static_single_frame" ||
  10. frame_change["processing_mode"].to_s == "static_image" ||
  11. local_intelligence["video_processing_mode"].to_s == "static_image"
  12. end
  13. def preferred_preview_image_url(event:, metadata:)
  14. preview_image_path(event) || metadata_preview_image_url(metadata: metadata)
  15. end
  16. def metadata_preview_image_url(metadata:)
  17. data = metadata_hash(metadata)
  18. direct = data["image_url"].to_s.presence
  19. return direct if direct.present?
  20. variants = Array(data["carousel_media"])
  21. candidate = variants.find { |entry| entry.is_a?(Hash) && entry["image_url"].to_s.present? }
  22. candidate.is_a?(Hash) ? candidate["image_url"].to_s.presence : nil
  23. end
  24. private
  25. def preview_image_path(event)
  26. return nil unless event.respond_to?(:preview_image)
  27. return nil unless event.preview_image.attached?
  28. Rails.application.routes.url_helpers.rails_blob_path(event.preview_image, only_path: true)
  29. rescue StandardError
  30. nil
  31. end
  32. def metadata_hash(value)
  33. value.is_a?(Hash) ? value : {}
  34. end
  35. end
  36. end
  37. end

app/services/story_content_understanding_service.rb

0.0% lines covered

100.0% branches covered

57 relevant lines. 0 lines covered and 57 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class StoryContentUnderstandingService
  2. def build(media_type:, detections:, transcript_text: nil)
  3. rows = Array(detections)
  4. faces = rows.sum { |row| Array(row[:faces]).length }
  5. ocr_chunks = rows.map { |row| row[:ocr_text].to_s.strip }.reject(&:blank?)
  6. ocr_text = ocr_chunks.uniq.join("\n").strip.presence
  7. locations = rows.flat_map { |row| Array(row[:location_tags]) }.map(&:to_s).map(&:strip).reject(&:blank?).uniq
  8. objects = rows.flat_map { |row| Array(row[:content_signals]) }.map(&:to_s).map(&:strip).reject(&:blank?)
  9. object_detections = rows.flat_map { |row| Array(row[:object_detections]) }.select { |row| row.is_a?(Hash) }
  10. scenes = rows.flat_map { |row| Array(row[:scenes]) }.select { |row| row.is_a?(Hash) }
  11. ocr_blocks = rows.flat_map { |row| Array(row[:ocr_blocks]) }.select { |row| row.is_a?(Hash) }
  12. mentions = rows.flat_map { |row| Array(row[:mentions]) }.map(&:to_s).map(&:downcase).uniq
  13. hashtags = rows.flat_map { |row| Array(row[:hashtags]) }.map(&:to_s).map(&:downcase).uniq
  14. profile_handles = rows.flat_map { |row| Array(row[:profile_handles]) }.map(&:to_s).map(&:downcase).uniq
  15. combined_text = [ ocr_text, transcript_text.to_s ].compact.join("\n")
  16. sentiment = infer_sentiment(combined_text)
  17. topics = infer_topics(objects: objects, hashtags: hashtags, transcript: transcript_text, ocr_text: ocr_text)
  18. {
  19. objects: objects.first(60),
  20. faces: faces,
  21. locations: locations.first(30),
  22. ocr_text: ocr_text,
  23. ocr_blocks: ocr_blocks.first(120),
  24. transcript: transcript_text.to_s.presence,
  25. sentiment: sentiment,
  26. topics: topics.first(30),
  27. mentions: mentions.first(40),
  28. hashtags: hashtags.first(40),
  29. profile_handles: profile_handles.first(40),
  30. scenes: scenes.first(80),
  31. object_detections: object_detections.first(120),
  32. media_type: media_type.to_s
  33. }
  34. end
  35. private
  36. POSITIVE_TERMS = %w[happy great love awesome excited win winning strong proud blessed amazing].freeze
  37. NEGATIVE_TERMS = %w[sad angry upset bad pain tired depressed sick fail failing stressed].freeze
  38. STOPWORDS = %w[the a an and or to of in on at for with is are this that from your my our they].freeze
  39. def infer_sentiment(text)
  40. tokens = tokenize(text)
  41. return "neutral" if tokens.empty?
  42. positive = tokens.count { |token| POSITIVE_TERMS.include?(token) }
  43. negative = tokens.count { |token| NEGATIVE_TERMS.include?(token) }
  44. return "positive" if positive > negative
  45. return "negative" if negative > positive
  46. "neutral"
  47. end
  48. def infer_topics(objects:, hashtags:, transcript:, ocr_text:)
  49. from_labels = objects.map(&:downcase)
  50. from_hashtags = hashtags.map { |tag| tag.to_s.sub(/^#/, "") }.reject(&:blank?)
  51. from_text = tokenize([ transcript, ocr_text ].join(" ")).reject { |token| STOPWORDS.include?(token) }
  52. (from_labels + from_hashtags + from_text).reject(&:blank?).uniq
  53. end
  54. def tokenize(text)
  55. text.to_s.downcase.scan(/[a-z0-9_]+/)
  56. end
  57. end

app/services/story_ingestion_service.rb

0.0% lines covered

100.0% branches covered

83 relevant lines. 0 lines covered and 83 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class StoryIngestionService
  2. def initialize(account:, profile:, enqueue_processing: true)
  3. @account = account
  4. @profile = profile
  5. @enqueue_processing = enqueue_processing
  6. end
  7. def ingest!(story:, source_event: nil, bytes: nil, content_type: nil, filename: nil, force_reprocess: false)
  8. story_id = story[:story_id].to_s.strip
  9. raise ArgumentError, "story_id is required" if story_id.blank?
  10. record = InstagramStory.find_or_initialize_by(instagram_profile: @profile, story_id: story_id)
  11. existing_story_record = record.persisted?
  12. record.instagram_account = @account
  13. record.source_event = source_event if source_event.present?
  14. record.media_type = story[:media_type].to_s.presence || infer_media_type(content_type: content_type)
  15. record.media_url = story[:media_url].to_s.presence
  16. record.image_url = story[:image_url].to_s.presence
  17. record.video_url = story[:video_url].to_s.presence
  18. record.taken_at = story[:taken_at] if story[:taken_at].present?
  19. record.expires_at = story[:expiring_at] if story[:expiring_at].present?
  20. record.duration_seconds = extract_duration_seconds(story: story, current: record.duration_seconds)
  21. record.metadata = merged_metadata(
  22. existing: record.metadata,
  23. story: story,
  24. filename: filename,
  25. content_type: content_type,
  26. media_bytes: bytes&.bytesize,
  27. existing_story_record: existing_story_record
  28. )
  29. if record.new_record? || force_reprocess
  30. record.processed = false
  31. record.processing_status = "pending"
  32. record.processed_at = nil
  33. end
  34. record.save!
  35. attach_media!(record: record, bytes: bytes, content_type: content_type, filename: filename) if bytes.present?
  36. enqueue_processing!(record: record, force_reprocess: force_reprocess)
  37. record
  38. end
  39. private
  40. def infer_media_type(content_type:)
  41. value = content_type.to_s.downcase
  42. return "video" if value.start_with?("video/")
  43. return "image" if value.start_with?("image/")
  44. nil
  45. end
  46. def merged_metadata(existing:, story:, filename:, content_type:, media_bytes:, existing_story_record:)
  47. current = existing.is_a?(Hash) ? existing : {}
  48. current.merge(
  49. "story_payload" => {
  50. "caption" => story[:caption].to_s,
  51. "permalink" => story[:permalink].to_s
  52. },
  53. "media_filename" => filename.to_s,
  54. "media_content_type" => content_type.to_s,
  55. "media_bytes" => media_bytes.to_i,
  56. "duplicate_story_storage_prevented" => ActiveModel::Type::Boolean.new.cast(existing_story_record),
  57. "ingested_at" => Time.current.iso8601
  58. )
  59. end
  60. def extract_duration_seconds(story:, current:)
  61. values = [
  62. story[:duration_seconds],
  63. story[:duration],
  64. story[:video_duration],
  65. current
  66. ]
  67. out = values.compact.map(&:to_f).find(&:positive?)
  68. out&.round(2)
  69. end
  70. def attach_media!(record:, bytes:, content_type:, filename:)
  71. return if record.media.attached?
  72. name = filename.to_s.presence || "story_#{record.story_id.parameterize}.bin"
  73. record.media.attach(io: StringIO.new(bytes), filename: name, content_type: content_type.to_s.presence || "application/octet-stream")
  74. rescue StandardError
  75. nil
  76. end
  77. def enqueue_processing!(record:, force_reprocess:)
  78. return unless @enqueue_processing
  79. return if record.processing_status == "processing"
  80. return if record.processed? && !force_reprocess
  81. StoryProcessingJob.perform_later(instagram_story_id: record.id, force: force_reprocess)
  82. end
  83. end

app/services/story_processing_service.rb

0.0% lines covered

100.0% branches covered

433 relevant lines. 0 lines covered and 433 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "json"
  2. require "net/http"
  3. require "uri"
  4. class StoryProcessingService
  5. DEFAULT_MATCH_MIN_CONFIDENCE = ENV.fetch("STORY_FACE_MATCH_MIN_CONFIDENCE", "0.78").to_f
  6. def initialize(
  7. story:,
  8. force: false,
  9. face_detection_service: FaceDetectionService.new,
  10. face_embedding_service: FaceEmbeddingService.new,
  11. vector_matching_service: VectorMatchingService.new,
  12. user_profile_builder_service: UserProfileBuilderService.new,
  13. video_frame_extraction_service: VideoFrameExtractionService.new,
  14. video_audio_extraction_service: VideoAudioExtractionService.new,
  15. speech_transcription_service: SpeechTranscriptionService.new,
  16. video_metadata_service: VideoMetadataService.new,
  17. video_frame_change_detector_service: VideoFrameChangeDetectorService.new,
  18. content_understanding_service: StoryContentUnderstandingService.new,
  19. response_generation_service: ResponseGenerationService.new,
  20. face_identity_resolution_service: FaceIdentityResolutionService.new,
  21. match_min_confidence: nil
  22. )
  23. @story = story
  24. @force = ActiveModel::Type::Boolean.new.cast(force)
  25. @face_detection_service = face_detection_service
  26. @face_embedding_service = face_embedding_service
  27. @vector_matching_service = vector_matching_service
  28. @user_profile_builder_service = user_profile_builder_service
  29. @video_frame_extraction_service = video_frame_extraction_service
  30. @video_audio_extraction_service = video_audio_extraction_service
  31. @speech_transcription_service = speech_transcription_service
  32. @video_metadata_service = video_metadata_service
  33. @video_frame_change_detector_service = video_frame_change_detector_service
  34. @content_understanding_service = content_understanding_service
  35. @response_generation_service = response_generation_service
  36. @face_identity_resolution_service = face_identity_resolution_service
  37. @match_min_confidence = begin
  38. value = match_min_confidence.nil? ? DEFAULT_MATCH_MIN_CONFIDENCE : match_min_confidence.to_f
  39. value.negative? ? DEFAULT_MATCH_MIN_CONFIDENCE : value
  40. rescue StandardError
  41. DEFAULT_MATCH_MIN_CONFIDENCE
  42. end
  43. end
  44. def process!
  45. return @story if @story.processed? && !@force
  46. @story.update!(processing_status: "processing", processed: false)
  47. @story.instagram_story_faces.delete_all if @force
  48. media_payload = load_media_payload
  49. result =
  50. if media_payload[:media_type] == "video"
  51. process_video_story(media_payload)
  52. else
  53. process_image_story(media_payload)
  54. end
  55. persist_faces!(detected_faces: result[:faces], story_id: media_payload[:story_id], fallback_image_bytes: media_payload[:image_bytes])
  56. linked_face_count = @story.instagram_story_faces.where.not(instagram_story_person_id: nil).count
  57. unlinked_face_count = @story.instagram_story_faces.where(instagram_story_person_id: nil).count
  58. content_understanding = @content_understanding_service.build(
  59. media_type: media_payload[:media_type],
  60. detections: result[:detections],
  61. transcript_text: result[:transcript_text]
  62. )
  63. suggestions = @response_generation_service.generate(
  64. profile: @story.instagram_profile,
  65. content_understanding: content_understanding
  66. )
  67. metadata = (@story.metadata.is_a?(Hash) ? @story.metadata : {}).merge(
  68. "ocr_text" => content_understanding[:ocr_text].to_s,
  69. "location_tags" => Array(content_understanding[:locations]).uniq,
  70. "content_signals" => Array(content_understanding[:objects]).uniq,
  71. "mentions" => Array(content_understanding[:mentions]).uniq,
  72. "hashtags" => Array(content_understanding[:hashtags]).uniq,
  73. "transcript" => content_understanding[:transcript].to_s.presence,
  74. "face_count" => result[:faces].length,
  75. "linked_face_count" => linked_face_count,
  76. "unlinked_face_count" => unlinked_face_count,
  77. "min_match_confidence" => @match_min_confidence.round(3),
  78. "processing_path" => media_payload[:media_type],
  79. "generated_response_suggestions" => suggestions,
  80. "content_understanding" => content_understanding,
  81. "last_processed_at" => Time.current.iso8601,
  82. "pipeline_version" => "story_processing_v2",
  83. "processing_metadata" => result[:processing_metadata]
  84. )
  85. @story.update!(
  86. processed: true,
  87. processing_status: "processed",
  88. processed_at: Time.current,
  89. duration_seconds: result[:duration_seconds] || @story.duration_seconds,
  90. metadata: metadata
  91. )
  92. identity_resolution = @face_identity_resolution_service.resolve_for_story!(
  93. story: @story,
  94. extracted_usernames: (
  95. Array(content_understanding[:mentions]) +
  96. Array(content_understanding[:profile_handles]) +
  97. content_understanding[:ocr_text].to_s.scan(/@[a-zA-Z0-9._]{2,30}/)
  98. ),
  99. content_summary: content_understanding
  100. )
  101. if identity_resolution.is_a?(Hash) && identity_resolution[:summary].is_a?(Hash)
  102. story_meta = @story.metadata.is_a?(Hash) ? @story.metadata.deep_dup : {}
  103. story_meta["face_identity"] = identity_resolution[:summary]
  104. story_meta["participant_summary"] = identity_resolution[:summary][:participant_summary_text].to_s
  105. @story.update!(metadata: story_meta)
  106. end
  107. InstagramProfileEvent.broadcast_story_archive_refresh!(account: @story.instagram_account)
  108. @user_profile_builder_service.refresh!(profile: @story.instagram_profile)
  109. @story
  110. rescue StandardError => e
  111. fail_story!(error_message: e.message)
  112. raise
  113. end
  114. private
  115. def process_image_story(media_payload)
  116. detection = @face_detection_service.detect(media_payload: media_payload)
  117. faces = Array(detection[:faces]).map do |face|
  118. face.merge(image_bytes: media_payload[:image_bytes], frame_index: 0, timestamp_seconds: 0.0)
  119. end
  120. {
  121. detections: [ detection ],
  122. faces: faces,
  123. transcript_text: nil,
  124. duration_seconds: nil,
  125. processing_metadata: {
  126. source: "image_single_frame",
  127. detection_metadata: detection[:metadata]
  128. }
  129. }
  130. end
  131. def process_video_story(media_payload)
  132. mode = @video_frame_change_detector_service.classify(
  133. video_bytes: media_payload[:bytes],
  134. reference_id: media_payload[:story_id],
  135. content_type: media_payload[:content_type]
  136. )
  137. if mode[:processing_mode].to_s == "static_image" && mode[:frame_bytes].present?
  138. result = process_image_story(
  139. media_payload.merge(
  140. media_type: "image",
  141. image_bytes: mode[:frame_bytes]
  142. )
  143. )
  144. result[:duration_seconds] = mode[:duration_seconds] if mode[:duration_seconds].to_f.positive?
  145. result[:processing_metadata] = (result[:processing_metadata].is_a?(Hash) ? result[:processing_metadata] : {}).merge(
  146. source: "video_static_single_frame",
  147. frame_change_detection: mode[:metadata]
  148. )
  149. return result
  150. end
  151. probe =
  152. if mode[:duration_seconds].to_f.positive? || mode.dig(:metadata, :video_probe).is_a?(Hash)
  153. {
  154. duration_seconds: mode[:duration_seconds],
  155. metadata: mode.dig(:metadata, :video_probe).is_a?(Hash) ? mode.dig(:metadata, :video_probe) : {}
  156. }
  157. else
  158. @video_metadata_service.probe(
  159. video_bytes: media_payload[:bytes],
  160. story_id: media_payload[:story_id],
  161. content_type: media_payload[:content_type]
  162. )
  163. end
  164. frames_result = @video_frame_extraction_service.extract(
  165. video_bytes: media_payload[:bytes],
  166. story_id: media_payload[:story_id],
  167. content_type: media_payload[:content_type]
  168. )
  169. detections = []
  170. faces = []
  171. Array(frames_result[:frames]).each do |frame|
  172. detection = @face_detection_service.detect(
  173. media_payload: {
  174. story_id: media_payload[:story_id],
  175. media_type: "image",
  176. image_bytes: frame[:image_bytes]
  177. }
  178. )
  179. detections << detection.merge(frame_index: frame[:index], timestamp_seconds: frame[:timestamp_seconds])
  180. Array(detection[:faces]).each do |face|
  181. faces << face.merge(
  182. image_bytes: frame[:image_bytes],
  183. frame_index: frame[:index],
  184. timestamp_seconds: frame[:timestamp_seconds]
  185. )
  186. end
  187. end
  188. audio = @video_audio_extraction_service.extract(
  189. video_bytes: media_payload[:bytes],
  190. story_id: media_payload[:story_id],
  191. content_type: media_payload[:content_type]
  192. )
  193. transcript = @speech_transcription_service.transcribe(
  194. audio_bytes: audio[:audio_bytes],
  195. story_id: media_payload[:story_id]
  196. )
  197. {
  198. detections: detections,
  199. faces: faces,
  200. transcript_text: transcript[:transcript],
  201. duration_seconds: probe[:duration_seconds],
  202. processing_metadata: {
  203. source: "video_multistage",
  204. video_probe: probe[:metadata],
  205. frame_change_detection: mode[:metadata],
  206. frame_extraction: frames_result[:metadata],
  207. audio_extraction: audio[:metadata],
  208. transcription: transcript[:metadata]
  209. }
  210. }
  211. end
  212. def persist_faces!(detected_faces:, story_id:, fallback_image_bytes:)
  213. Array(detected_faces).each do |face|
  214. observation_signature = face_observation_signature(story_id: story_id, face: face)
  215. confidence = face[:confidence].to_f
  216. unless linkable_face_confidence?(confidence)
  217. persist_unlinked_story_face!(
  218. face: face,
  219. observation_signature: observation_signature,
  220. reason: "low_confidence"
  221. )
  222. next
  223. end
  224. face_image_bytes = face[:image_bytes].presence || fallback_image_bytes
  225. if face_image_bytes.blank?
  226. persist_unlinked_story_face!(
  227. face: face,
  228. observation_signature: observation_signature,
  229. reason: "face_image_missing"
  230. )
  231. next
  232. end
  233. embedding_payload = @face_embedding_service.embed(
  234. media_payload: {
  235. story_id: story_id,
  236. media_type: "image",
  237. image_bytes: face_image_bytes
  238. },
  239. face: face
  240. )
  241. vector = Array(embedding_payload[:vector]).map(&:to_f)
  242. if vector.empty?
  243. persist_unlinked_story_face!(
  244. face: face,
  245. observation_signature: observation_signature,
  246. reason: "embedding_unavailable"
  247. )
  248. next
  249. end
  250. match = @vector_matching_service.match_or_create!(
  251. account: @story.instagram_account,
  252. profile: @story.instagram_profile,
  253. embedding: vector,
  254. occurred_at: @story.taken_at || Time.current,
  255. observation_signature: observation_signature
  256. )
  257. update_person_face_attributes!(person: match[:person], face: face)
  258. attrs = {
  259. instagram_story_person: match[:person],
  260. role: match[:role].to_s.presence || "unknown",
  261. detector_confidence: face[:confidence].to_f,
  262. match_similarity: match[:similarity],
  263. embedding_version: embedding_payload[:version].to_s,
  264. embedding: vector,
  265. bounding_box: face[:bounding_box],
  266. metadata: story_face_metadata(
  267. face: face,
  268. observation_signature: observation_signature,
  269. link_status: "matched"
  270. )
  271. }
  272. attrs[:embedding_vector] = vector if InstagramStoryFace.column_names.include?("embedding_vector")
  273. @story.instagram_story_faces.create!(attrs)
  274. end
  275. end
  276. def face_observation_signature(story_id:, face:)
  277. bbox = face[:bounding_box].is_a?(Hash) ? face[:bounding_box] : {}
  278. [
  279. "story",
  280. story_id.to_s,
  281. face[:frame_index].to_i,
  282. face[:timestamp_seconds].to_f.round(3),
  283. bbox["x1"],
  284. bbox["y1"],
  285. bbox["x2"],
  286. bbox["y2"]
  287. ].map(&:to_s).join(":")
  288. end
  289. def linkable_face_confidence?(confidence)
  290. confidence.to_f >= @match_min_confidence
  291. end
  292. def persist_unlinked_story_face!(face:, observation_signature:, reason:)
  293. @story.instagram_story_faces.create!(
  294. instagram_story_person: nil,
  295. role: "unknown",
  296. detector_confidence: face[:confidence].to_f,
  297. match_similarity: nil,
  298. embedding_version: nil,
  299. embedding: nil,
  300. bounding_box: face[:bounding_box],
  301. metadata: story_face_metadata(
  302. face: face,
  303. observation_signature: observation_signature,
  304. link_status: "unlinked",
  305. link_skip_reason: reason
  306. )
  307. )
  308. rescue StandardError
  309. nil
  310. end
  311. def story_face_metadata(face:, observation_signature:, link_status:, link_skip_reason: nil)
  312. {
  313. frame_index: face[:frame_index],
  314. timestamp_seconds: face[:timestamp_seconds],
  315. landmarks: face[:landmarks],
  316. likelihoods: face[:likelihoods],
  317. age: face[:age],
  318. age_range: face[:age_range],
  319. gender: face[:gender],
  320. gender_score: face[:gender_score].to_f,
  321. observation_signature: observation_signature,
  322. link_status: link_status,
  323. link_skip_reason: link_skip_reason
  324. }.compact
  325. end
  326. def load_media_payload
  327. bytes = nil
  328. content_type = nil
  329. if @story.media.attached?
  330. bytes = @story.media.download
  331. content_type = @story.media.content_type.to_s
  332. end
  333. url = media_download_url
  334. if bytes.blank? && url.present?
  335. bytes = download_bytes!(url)
  336. content_type = infer_content_type_from_url(url, fallback: content_type)
  337. end
  338. raise "No media payload available for story_id=#{@story.story_id}" if bytes.blank?
  339. media_type = infer_media_type(
  340. story_media_type: @story.media_type,
  341. content_type: content_type
  342. )
  343. image_bytes = media_type == "image" ? bytes : nil
  344. {
  345. story_id: @story.story_id,
  346. media_type: media_type,
  347. bytes: bytes,
  348. content_type: content_type,
  349. image_bytes: image_bytes
  350. }
  351. end
  352. def media_download_url
  353. if @story.video?
  354. @story.video_url.to_s.presence || @story.media_url.to_s.presence || @story.image_url.to_s.presence
  355. else
  356. @story.image_url.to_s.presence || @story.media_url.to_s.presence
  357. end
  358. end
  359. def infer_media_type(story_media_type:, content_type:)
  360. return "video" if story_media_type.to_s == "video"
  361. return "video" if content_type.to_s.start_with?("video/")
  362. "image"
  363. end
  364. def infer_content_type_from_url(url, fallback:)
  365. return fallback.to_s if fallback.to_s.present?
  366. value = url.to_s.downcase
  367. return "video/mp4" if value.include?(".mp4")
  368. return "video/quicktime" if value.include?(".mov")
  369. return "image/png" if value.include?(".png")
  370. return "image/webp" if value.include?(".webp")
  371. return "image/jpeg" if value.include?(".jpg") || value.include?(".jpeg")
  372. "application/octet-stream"
  373. end
  374. def download_bytes!(url)
  375. uri = URI.parse(url)
  376. raise "Invalid media URL" unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
  377. req = Net::HTTP::Get.new(uri.request_uri)
  378. req["Accept"] = "*/*"
  379. req["Referer"] = "https://www.instagram.com/"
  380. http = Net::HTTP.new(uri.host, uri.port)
  381. http.use_ssl = (uri.scheme == "https")
  382. http.open_timeout = 8
  383. http.read_timeout = 25
  384. response = http.request(req)
  385. raise "Media download failed: HTTP #{response.code}" unless response.is_a?(Net::HTTPSuccess)
  386. response.body.to_s
  387. end
  388. def update_person_face_attributes!(person:, face:)
  389. return unless person
  390. metadata = person.metadata.is_a?(Hash) ? person.metadata.deep_dup : {}
  391. attrs = metadata["face_attributes"].is_a?(Hash) ? metadata["face_attributes"].deep_dup : {}
  392. gender = face[:gender].to_s.strip.downcase
  393. if gender.present?
  394. gender_counts = attrs["gender_counts"].is_a?(Hash) ? attrs["gender_counts"].deep_dup : {}
  395. gender_counts[gender] = gender_counts[gender].to_i + 1
  396. attrs["gender_counts"] = gender_counts
  397. attrs["primary_gender_cue"] = gender_counts.max_by { |_key, count| count.to_i }&.first
  398. end
  399. age_range = face[:age_range].to_s.strip
  400. if age_range.present?
  401. age_counts = attrs["age_range_counts"].is_a?(Hash) ? attrs["age_range_counts"].deep_dup : {}
  402. age_counts[age_range] = age_counts[age_range].to_i + 1
  403. attrs["age_range_counts"] = age_counts
  404. attrs["primary_age_range"] = age_counts.max_by { |_key, count| count.to_i }&.first
  405. end
  406. age_value = face[:age].to_f
  407. if age_value.positive?
  408. samples = Array(attrs["age_samples"]).map(&:to_f).first(19)
  409. samples << age_value.round(1)
  410. attrs["age_samples"] = samples
  411. attrs["age_estimate"] = (samples.sum / samples.length.to_f).round(1)
  412. end
  413. attrs["last_observed_at"] = Time.current.iso8601
  414. metadata["face_attributes"] = attrs
  415. person.update_columns(metadata: metadata, updated_at: Time.current)
  416. rescue StandardError
  417. nil
  418. end
  419. def fail_story!(error_message:)
  420. metadata = (@story.metadata.is_a?(Hash) ? @story.metadata : {}).merge(
  421. "processing_error" => error_message.to_s,
  422. "failed_at" => Time.current.iso8601
  423. )
  424. @story.update(
  425. processing_status: "failed",
  426. processed: false,
  427. metadata: metadata
  428. )
  429. InstagramProfileEvent.broadcast_story_archive_refresh!(account: @story.instagram_account)
  430. rescue StandardError
  431. nil
  432. end
  433. end

app/services/user_profile_builder_service.rb

0.0% lines covered

100.0% branches covered

87 relevant lines. 0 lines covered and 87 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class UserProfileBuilderService
  2. def refresh!(profile:)
  3. stories = profile.instagram_stories.processed.to_a
  4. return nil if stories.empty?
  5. by_hour = Hash.new(0)
  6. by_weekday = Hash.new(0)
  7. location_counts = Hash.new(0)
  8. content_signal_counts = Hash.new(0)
  9. topic_counts = Hash.new(0)
  10. hashtag_counts = Hash.new(0)
  11. sentiment_counts = Hash.new(0)
  12. stories.each do |story|
  13. timestamp = story.taken_at || story.created_at
  14. by_hour[timestamp.hour] += 1
  15. by_weekday[timestamp.wday] += 1
  16. metadata = story.metadata.is_a?(Hash) ? story.metadata : {}
  17. Array(metadata["location_tags"]).each { |tag| location_counts[tag.to_s] += 1 if tag.present? }
  18. Array(metadata["content_signals"]).each { |signal| content_signal_counts[signal.to_s] += 1 if signal.present? }
  19. understanding = metadata["content_understanding"].is_a?(Hash) ? metadata["content_understanding"] : {}
  20. Array(understanding["topics"]).each { |topic| topic_counts[topic.to_s] += 1 if topic.present? }
  21. Array(understanding["hashtags"]).each { |tag| hashtag_counts[tag.to_s] += 1 if tag.present? }
  22. sentiment = understanding["sentiment"].to_s.strip
  23. sentiment_counts[sentiment] += 1 if sentiment.present?
  24. end
  25. story_person_counts = InstagramStoryFace.joins(:instagram_story)
  26. .where(instagram_stories: { instagram_profile_id: profile.id })
  27. .where.not(instagram_story_person_id: nil)
  28. .group(:instagram_story_person_id)
  29. .count
  30. post_person_counts = InstagramPostFace.joins(:instagram_profile_post)
  31. .where(instagram_profile_posts: { instagram_profile_id: profile.id })
  32. .where.not(instagram_story_person_id: nil)
  33. .group(:instagram_story_person_id)
  34. .count
  35. person_counts = story_person_counts.merge(post_person_counts) { |_person_id, left, right| left.to_i + right.to_i }
  36. top_people = profile.instagram_story_people.where(id: person_counts.keys).map do |person|
  37. {
  38. person_id: person.id,
  39. role: person.role,
  40. label: person.label.to_s.presence,
  41. appearances: person_counts[person.id].to_i
  42. }.compact
  43. end.sort_by { |row| -row[:appearances] }.first(10)
  44. score = activity_score(
  45. stories_count: stories.length,
  46. active_hours_count: by_hour.keys.length,
  47. secondary_person_mentions: top_people.reject { |row| row[:role] == "primary_user" }.sum { |row| row[:appearances].to_i }
  48. )
  49. record = InstagramProfileBehaviorProfile.find_or_initialize_by(instagram_profile: profile)
  50. existing_summary = record.behavioral_summary.is_a?(Hash) ? record.behavioral_summary.deep_dup : {}
  51. existing_metadata = record.metadata.is_a?(Hash) ? record.metadata.deep_dup : {}
  52. summary = {
  53. posting_time_pattern: {
  54. hour_histogram: by_hour.sort.to_h,
  55. weekday_histogram: by_weekday.sort.to_h
  56. },
  57. common_locations: sort_top(location_counts),
  58. frequent_secondary_persons: top_people.reject { |row| row[:role] == "primary_user" },
  59. content_categories: sort_top(content_signal_counts),
  60. topic_clusters: sort_top(topic_counts),
  61. top_hashtags: sort_top(hashtag_counts),
  62. sentiment_trend: sort_top(sentiment_counts, limit: 5)
  63. }
  64. summary["face_identity_profile"] = existing_summary["face_identity_profile"] if existing_summary["face_identity_profile"].is_a?(Hash)
  65. summary["related_individuals"] = Array(existing_summary["related_individuals"]) if existing_summary["related_individuals"].present?
  66. summary["known_username_matches"] = Array(existing_summary["known_username_matches"]) if existing_summary["known_username_matches"].present?
  67. record.activity_score = score
  68. record.behavioral_summary = summary
  69. record.metadata = existing_metadata.merge(
  70. stories_processed: stories.length,
  71. post_faces_processed: profile.instagram_post_faces.count,
  72. refreshed_at: Time.current.iso8601
  73. )
  74. record.save!
  75. record
  76. end
  77. private
  78. def sort_top(count_hash, limit: 20)
  79. count_hash.sort_by { |_key, count| -count }.first(limit).to_h
  80. end
  81. def activity_score(stories_count:, active_hours_count:, secondary_person_mentions:)
  82. volume = [ stories_count.to_f / 30.0, 1.0 ].min
  83. hourly_diversity = [ active_hours_count.to_f / 24.0, 1.0 ].min
  84. social = [ secondary_person_mentions.to_f / 20.0, 1.0 ].min
  85. ((volume + hourly_diversity + social) / 3.0).round(4)
  86. end
  87. end

app/services/vector_matching_service.rb

0.0% lines covered

100.0% branches covered

176 relevant lines. 0 lines covered and 176 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. class VectorMatchingService
  2. DEFAULT_THRESHOLD = 0.85
  3. def initialize(threshold: nil)
  4. @threshold = threshold.to_f.positive? ? threshold.to_f : DEFAULT_THRESHOLD
  5. end
  6. def match_or_create!(account:, profile:, embedding:, occurred_at: Time.current, observation_signature: nil)
  7. vector = normalize(embedding)
  8. raise ArgumentError, "embedding vector is required" if vector.empty?
  9. best = best_match(profile: profile, vector: vector)
  10. if best && best[:similarity] >= @threshold
  11. person = best[:person]
  12. observation = upsert_person_embedding!(
  13. person: person,
  14. vector: vector,
  15. occurred_at: occurred_at,
  16. observation_signature: observation_signature
  17. )
  18. role = person.role == "primary_user" ? "primary_user" : "secondary_person"
  19. return {
  20. person: person,
  21. matched: true,
  22. similarity: best[:similarity],
  23. role: role,
  24. observation_recorded: observation[:recorded]
  25. }
  26. end
  27. normalized_signature = normalize_observation_signature(observation_signature)
  28. metadata = { source: "auto_cluster" }
  29. if normalized_signature.present?
  30. metadata["observation_signatures"] = [ normalized_signature ]
  31. metadata["observation_signatures_count"] = 1
  32. metadata["last_observation_signature"] = normalized_signature
  33. end
  34. attrs = {
  35. instagram_account: account,
  36. instagram_profile: profile,
  37. role: "secondary_person",
  38. first_seen_at: occurred_at,
  39. last_seen_at: occurred_at,
  40. appearance_count: 1,
  41. canonical_embedding: vector,
  42. metadata: metadata
  43. }
  44. attrs[:canonical_embedding_vector] = vector if pgvector_column_available?
  45. person = InstagramStoryPerson.create!(attrs)
  46. person.sync_identity_confidence!
  47. {
  48. person: person,
  49. matched: false,
  50. similarity: best&.dig(:similarity),
  51. role: person.role,
  52. observation_recorded: true
  53. }
  54. end
  55. def upsert_primary_person!(account:, profile:, embedding:, occurred_at: Time.current, label: nil)
  56. vector = normalize(embedding)
  57. raise ArgumentError, "embedding vector is required" if vector.empty?
  58. person = InstagramStoryPerson.find_or_initialize_by(
  59. instagram_account: account,
  60. instagram_profile: profile,
  61. role: "primary_user"
  62. )
  63. person.label = label if label.present?
  64. person.first_seen_at ||= occurred_at
  65. person.last_seen_at = [ person.last_seen_at, occurred_at ].compact.max
  66. person.appearance_count = [ person.appearance_count.to_i, 1 ].max
  67. person.canonical_embedding = vector
  68. person.canonical_embedding_vector = vector if person.respond_to?(:canonical_embedding_vector=)
  69. person.metadata = (person.metadata.is_a?(Hash) ? person.metadata : {}).merge("source" => "primary_seed")
  70. person.save!
  71. person.sync_identity_confidence!
  72. person
  73. end
  74. private
  75. def best_match(profile:, vector:)
  76. if pgvector_enabled?
  77. vector_sql = vector_literal(vector)
  78. query = profile.instagram_story_people.where.not(canonical_embedding_vector: nil)
  79. return nil unless query.exists?
  80. person = query
  81. .select(Arel.sql("instagram_story_people.*, (1 - (canonical_embedding_vector <=> '#{vector_sql}'::vector)) AS similarity_score"))
  82. .order(Arel.sql("canonical_embedding_vector <=> '#{vector_sql}'::vector"))
  83. .limit(25)
  84. .to_a
  85. .find(&:active_for_matching?)
  86. return nil unless person
  87. return { person: person, similarity: person.attributes["similarity_score"].to_f }
  88. end
  89. candidates = profile.instagram_story_people.where.not(canonical_embedding: nil).to_a.select(&:active_for_matching?)
  90. return nil if candidates.empty?
  91. candidates.map do |person|
  92. other = normalize(person.canonical_embedding)
  93. next nil if other.length != vector.length
  94. { person: person, similarity: cosine_similarity(vector, other) }
  95. end.compact.max_by { |item| item[:similarity] }
  96. end
  97. def upsert_person_embedding!(person:, vector:, occurred_at:, observation_signature:)
  98. current_count = person.appearance_count.to_i
  99. current = normalize(person.canonical_embedding)
  100. metadata = person.metadata.is_a?(Hash) ? person.metadata.deep_dup : {}
  101. normalized_signature = normalize_observation_signature(observation_signature)
  102. known_signatures = Array(metadata["observation_signatures"]).map(&:to_s).reject(&:blank?)
  103. duplicate_observation = normalized_signature.present? && known_signatures.include?(normalized_signature)
  104. if duplicate_observation
  105. attrs = {
  106. first_seen_at: person.first_seen_at || occurred_at,
  107. last_seen_at: [ person.last_seen_at, occurred_at ].compact.max
  108. }
  109. person.update!(attrs)
  110. return { recorded: false, duplicate: true }
  111. end
  112. updated_vector = if current.length == vector.length && current_count.positive?
  113. merged = current.each_with_index.map do |value, idx|
  114. ((value * current_count) + vector[idx]) / (current_count + 1)
  115. end
  116. normalize(merged)
  117. else
  118. vector
  119. end
  120. attrs = {
  121. canonical_embedding: updated_vector,
  122. appearance_count: current_count + 1,
  123. first_seen_at: person.first_seen_at || occurred_at,
  124. last_seen_at: [ person.last_seen_at, occurred_at ].compact.max
  125. }
  126. if normalized_signature.present?
  127. updated_signatures = (known_signatures << normalized_signature).uniq.last(400)
  128. metadata["observation_signatures"] = updated_signatures
  129. metadata["observation_signatures_count"] = updated_signatures.length
  130. metadata["last_observation_signature"] = normalized_signature
  131. attrs[:metadata] = metadata
  132. end
  133. attrs[:canonical_embedding_vector] = updated_vector if person.respond_to?(:canonical_embedding_vector=)
  134. person.update!(attrs)
  135. person.sync_identity_confidence!
  136. { recorded: true, duplicate: false }
  137. end
  138. def cosine_similarity(a, b)
  139. dot = 0.0
  140. mag_a = 0.0
  141. mag_b = 0.0
  142. a.each_with_index do |left, idx|
  143. right = b[idx].to_f
  144. dot += left * right
  145. mag_a += left * left
  146. mag_b += right * right
  147. end
  148. denom = Math.sqrt(mag_a) * Math.sqrt(mag_b)
  149. return 0.0 if denom <= 0.0
  150. dot / denom
  151. end
  152. def normalize(values)
  153. vector = Array(values).map(&:to_f)
  154. return [] if vector.empty?
  155. norm = Math.sqrt(vector.sum { |value| value * value })
  156. return [] if norm <= 0.0
  157. vector.map { |value| value / norm }
  158. end
  159. def pgvector_enabled?
  160. return false unless ActiveRecord::Base.connection.adapter_name.to_s.downcase.include?("postgresql")
  161. pgvector_column_available?
  162. rescue StandardError
  163. false
  164. end
  165. def pgvector_column_available?
  166. InstagramStoryPerson.column_names.include?("canonical_embedding_vector")
  167. end
  168. def vector_literal(vector)
  169. "[" + vector.map { |value| format("%.8f", value.to_f) }.join(",") + "]"
  170. end
  171. def normalize_observation_signature(value)
  172. token = value.to_s.strip
  173. return nil if token.blank?
  174. token.byteslice(0, 255)
  175. end
  176. end

app/services/video_audio_extraction_service.rb

0.0% lines covered

100.0% branches covered

62 relevant lines. 0 lines covered and 62 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "open3"
  2. require "shellwords"
  3. require "tempfile"
  4. require "tmpdir"
  5. class VideoAudioExtractionService
  6. def initialize(ffmpeg_bin: nil)
  7. resolved_bin = ffmpeg_bin.to_s.presence || ENV["FFMPEG_BIN"].to_s.presence || default_ffmpeg_bin
  8. @ffmpeg_bin = resolved_bin.to_s
  9. end
  10. def extract(video_bytes:, story_id:, content_type: nil)
  11. return empty_result("video_bytes_missing") if video_bytes.blank?
  12. return empty_result("ffmpeg_missing") unless command_available?(@ffmpeg_bin)
  13. Tempfile.create([ "story_video_#{story_id}", extension_for(content_type: content_type) ]) do |video_file|
  14. video_file.binmode
  15. video_file.write(video_bytes)
  16. video_file.flush
  17. Dir.mktmpdir("story_audio_#{story_id}_") do |output_dir|
  18. output_path = File.join(output_dir, "audio.wav")
  19. cmd = [ @ffmpeg_bin, "-hide_banner", "-loglevel", "error", "-i", video_file.path, "-vn", "-ac", "1", "-ar", "16000", "-f", "wav", output_path ]
  20. _stdout, stderr, status = Open3.capture3(*cmd)
  21. return empty_result("ffmpeg_audio_extract_failed", stderr: stderr.to_s) unless status.success?
  22. return empty_result("audio_not_found") unless File.exist?(output_path)
  23. {
  24. audio_bytes: File.binread(output_path),
  25. content_type: "audio/wav",
  26. metadata: {
  27. source: "ffmpeg"
  28. }
  29. }
  30. end
  31. end
  32. rescue StandardError => e
  33. empty_result("audio_extraction_error", stderr: e.message)
  34. end
  35. private
  36. def command_available?(command)
  37. system("command -v #{Shellwords.escape(command)} >/dev/null 2>&1")
  38. end
  39. def default_ffmpeg_bin
  40. local_bin = File.expand_path("~/.local/bin/ffmpeg")
  41. return local_bin if File.exist?(local_bin)
  42. "ffmpeg"
  43. end
  44. def extension_for(content_type:)
  45. value = content_type.to_s.downcase
  46. return ".mp4" if value.include?("mp4")
  47. return ".mov" if value.include?("quicktime")
  48. return ".webm" if value.include?("webm")
  49. ".mp4"
  50. end
  51. def empty_result(reason, stderr: nil)
  52. {
  53. audio_bytes: nil,
  54. content_type: nil,
  55. metadata: {
  56. source: "ffmpeg",
  57. reason: reason,
  58. stderr: stderr.to_s.presence
  59. }.compact
  60. }
  61. end
  62. end

app/services/video_frame_change_detector_service.rb

0.0% lines covered

100.0% branches covered

203 relevant lines. 0 lines covered and 203 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "open3"
  2. require "shellwords"
  3. require "tempfile"
  4. class VideoFrameChangeDetectorService
  5. DEFAULT_SAMPLE_FRAMES = 3
  6. DEFAULT_DIFF_THRESHOLD = 2.5
  7. GRAYSCALE_WIDTH = 32
  8. GRAYSCALE_HEIGHT = 32
  9. def initialize(
  10. ffmpeg_bin: nil,
  11. sample_frames: nil,
  12. diff_threshold: nil,
  13. video_metadata_service: VideoMetadataService.new
  14. )
  15. resolved_bin = ffmpeg_bin.to_s.presence || ENV["FFMPEG_BIN"].to_s.presence || default_ffmpeg_bin
  16. @ffmpeg_bin = resolved_bin.to_s
  17. @sample_frames = sample_frames.to_i.positive? ? sample_frames.to_i : DEFAULT_SAMPLE_FRAMES
  18. @diff_threshold = diff_threshold.to_f.positive? ? diff_threshold.to_f : DEFAULT_DIFF_THRESHOLD
  19. @video_metadata_service = video_metadata_service
  20. end
  21. def classify(video_bytes:, reference_id:, content_type: nil)
  22. return empty_result(reason: "video_bytes_missing") if video_bytes.blank?
  23. return empty_result(reason: "ffmpeg_missing") unless command_available?(@ffmpeg_bin)
  24. Tempfile.create([ "video_change_detect_#{reference_id}", extension_for(content_type: content_type) ]) do |video_file|
  25. video_file.binmode
  26. video_file.write(video_bytes)
  27. video_file.flush
  28. probe = @video_metadata_service.probe(
  29. video_bytes: video_bytes,
  30. story_id: reference_id,
  31. content_type: content_type
  32. )
  33. duration_seconds = probe[:duration_seconds]
  34. timestamps = sample_timestamps(duration_seconds: duration_seconds)
  35. samples = timestamps.filter_map do |timestamp|
  36. gray = extract_grayscale_frame(video_path: video_file.path, timestamp_seconds: timestamp)
  37. next if gray.blank?
  38. {
  39. timestamp_seconds: timestamp,
  40. gray_bytes: gray
  41. }
  42. end
  43. primary_timestamp = samples.first&.dig(:timestamp_seconds) || 0.0
  44. representative_frame = extract_jpeg_frame(video_path: video_file.path, timestamp_seconds: primary_timestamp)
  45. return empty_result(
  46. reason: "insufficient_sample_frames",
  47. frame_bytes: representative_frame,
  48. duration_seconds: duration_seconds,
  49. metadata: {
  50. sampled_timestamps: timestamps,
  51. sampled_frames: samples.length,
  52. video_probe: probe[:metadata]
  53. }
  54. ) if samples.length < 2
  55. diffs = compute_frame_diffs(samples: samples)
  56. max_diff = diffs.max.to_f
  57. avg_diff = (diffs.sum.to_f / diffs.length.to_f).round(4)
  58. static = max_diff <= @diff_threshold
  59. {
  60. static: static,
  61. processing_mode: static ? "static_image" : "dynamic_video",
  62. frame_bytes: representative_frame,
  63. frame_content_type: representative_frame.present? ? "image/jpeg" : nil,
  64. duration_seconds: duration_seconds,
  65. metadata: {
  66. sampled_timestamps: samples.map { |row| row[:timestamp_seconds] },
  67. sampled_frames: samples.length,
  68. diff_threshold: @diff_threshold,
  69. max_mean_diff: max_diff.round(4),
  70. avg_mean_diff: avg_diff,
  71. frame_diffs: diffs.map { |value| value.round(4) },
  72. video_probe: probe[:metadata]
  73. }
  74. }
  75. end
  76. rescue StandardError => e
  77. empty_result(
  78. reason: "frame_change_detection_error",
  79. metadata: {
  80. error_class: e.class.name,
  81. error_message: e.message.to_s
  82. }
  83. )
  84. end
  85. private
  86. def sample_timestamps(duration_seconds:)
  87. duration = duration_seconds.to_f
  88. return [ 0.0, 0.8, 1.6 ].first(@sample_frames).uniq if duration <= 0.0
  89. last = [ duration - 0.12, 0.0 ].max
  90. middle = duration / 2.0
  91. points = [ 0.0, middle, last ].first(@sample_frames).map { |value| value.round(3) }.uniq
  92. while points.length < [ @sample_frames, 2 ].max
  93. points << (points.last.to_f + 0.5).round(3)
  94. points = points.uniq
  95. end
  96. points
  97. end
  98. def extract_grayscale_frame(video_path:, timestamp_seconds:)
  99. cmd = [
  100. @ffmpeg_bin,
  101. "-hide_banner",
  102. "-loglevel",
  103. "error",
  104. "-ss",
  105. format("%.3f", timestamp_seconds.to_f),
  106. "-i",
  107. video_path.to_s,
  108. "-frames:v",
  109. "1",
  110. "-vf",
  111. "scale=#{GRAYSCALE_WIDTH}:#{GRAYSCALE_HEIGHT},format=gray",
  112. "-f",
  113. "rawvideo",
  114. "-pix_fmt",
  115. "gray",
  116. "pipe:1"
  117. ]
  118. stdout, _stderr, status = Open3.capture3(*cmd)
  119. return nil unless status.success?
  120. return nil unless stdout.bytesize == (GRAYSCALE_WIDTH * GRAYSCALE_HEIGHT)
  121. stdout
  122. rescue StandardError
  123. nil
  124. end
  125. def extract_jpeg_frame(video_path:, timestamp_seconds:)
  126. cmd = [
  127. @ffmpeg_bin,
  128. "-hide_banner",
  129. "-loglevel",
  130. "error",
  131. "-ss",
  132. format("%.3f", timestamp_seconds.to_f),
  133. "-i",
  134. video_path.to_s,
  135. "-frames:v",
  136. "1",
  137. "-q:v",
  138. "2",
  139. "-f",
  140. "image2pipe",
  141. "-vcodec",
  142. "mjpeg",
  143. "pipe:1"
  144. ]
  145. stdout, _stderr, status = Open3.capture3(*cmd)
  146. return nil unless status.success?
  147. return nil if stdout.blank?
  148. stdout
  149. rescue StandardError
  150. nil
  151. end
  152. def compute_frame_diffs(samples:)
  153. list = Array(samples)
  154. return [] if list.length < 2
  155. baseline = list.first[:gray_bytes]
  156. diffs = []
  157. list.drop(1).each do |row|
  158. diffs << mean_abs_diff(baseline, row[:gray_bytes])
  159. end
  160. list.each_cons(2) do |previous, current|
  161. diffs << mean_abs_diff(previous[:gray_bytes], current[:gray_bytes])
  162. end
  163. diffs
  164. end
  165. def mean_abs_diff(bytes_a, bytes_b)
  166. a = bytes_a.to_s
  167. b = bytes_b.to_s
  168. length = [ a.bytesize, b.bytesize ].min
  169. return 255.0 if length <= 0
  170. total = 0
  171. length.times do |index|
  172. total += (a.getbyte(index).to_i - b.getbyte(index).to_i).abs
  173. end
  174. total.to_f / length.to_f
  175. end
  176. def command_available?(command)
  177. system("command -v #{Shellwords.escape(command)} >/dev/null 2>&1")
  178. end
  179. def default_ffmpeg_bin
  180. local_bin = File.expand_path("~/.local/bin/ffmpeg")
  181. return local_bin if File.exist?(local_bin)
  182. "ffmpeg"
  183. end
  184. def extension_for(content_type:)
  185. value = content_type.to_s.downcase
  186. return ".mp4" if value.include?("mp4")
  187. return ".mov" if value.include?("quicktime")
  188. return ".webm" if value.include?("webm")
  189. ".mp4"
  190. end
  191. def empty_result(reason:, frame_bytes: nil, duration_seconds: nil, metadata: {})
  192. {
  193. static: nil,
  194. processing_mode: "dynamic_video",
  195. frame_bytes: frame_bytes,
  196. frame_content_type: frame_bytes.present? ? "image/jpeg" : nil,
  197. duration_seconds: duration_seconds,
  198. metadata: {
  199. reason: reason
  200. }.merge(metadata.to_h)
  201. }
  202. end
  203. end

app/services/video_frame_extraction_service.rb

0.0% lines covered

100.0% branches covered

74 relevant lines. 0 lines covered and 74 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "open3"
  2. require "shellwords"
  3. require "tempfile"
  4. require "tmpdir"
  5. class VideoFrameExtractionService
  6. DEFAULT_INTERVAL_SECONDS = 2.0
  7. DEFAULT_MAX_FRAMES = 24
  8. def initialize(ffmpeg_bin: nil, interval_seconds: nil, max_frames: nil)
  9. resolved_bin = ffmpeg_bin.to_s.presence || ENV["FFMPEG_BIN"].to_s.presence || default_ffmpeg_bin
  10. @ffmpeg_bin = resolved_bin.to_s
  11. @interval_seconds = interval_seconds.to_f.positive? ? interval_seconds.to_f : ENV.fetch("VIDEO_FRAME_INTERVAL_SECONDS", DEFAULT_INTERVAL_SECONDS).to_f
  12. @max_frames = max_frames.to_i.positive? ? max_frames.to_i : ENV.fetch("VIDEO_MAX_FRAMES", DEFAULT_MAX_FRAMES).to_i
  13. end
  14. def extract(video_bytes:, story_id:, content_type: nil)
  15. return empty_result("video_bytes_missing") if video_bytes.blank?
  16. return empty_result("ffmpeg_missing") unless command_available?(@ffmpeg_bin)
  17. Tempfile.create([ "story_video_#{story_id}", extension_for(content_type: content_type) ]) do |video_file|
  18. video_file.binmode
  19. video_file.write(video_bytes)
  20. video_file.flush
  21. Dir.mktmpdir("story_frames_#{story_id}_") do |output_dir|
  22. pattern = File.join(output_dir, "frame_%05d.jpg")
  23. fps = format("1/%.2f", [ @interval_seconds, 0.2 ].max)
  24. cmd = [ @ffmpeg_bin, "-hide_banner", "-loglevel", "error", "-i", video_file.path, "-vf", "fps=#{fps}", "-q:v", "2", pattern ]
  25. _stdout, stderr, status = Open3.capture3(*cmd)
  26. return empty_result("ffmpeg_extract_failed", stderr: stderr.to_s) unless status.success?
  27. files = Dir[File.join(output_dir, "frame_*.jpg")].sort.first(@max_frames)
  28. frames = files.each_with_index.map do |path, idx|
  29. {
  30. index: idx,
  31. timestamp_seconds: (idx * @interval_seconds).round(2),
  32. image_bytes: File.binread(path)
  33. }
  34. end
  35. {
  36. frames: frames,
  37. metadata: {
  38. source: "ffmpeg",
  39. interval_seconds: @interval_seconds,
  40. extracted_frames: files.length
  41. }
  42. }
  43. end
  44. end
  45. rescue StandardError => e
  46. empty_result("frame_extraction_error", stderr: e.message)
  47. end
  48. private
  49. def command_available?(command)
  50. system("command -v #{Shellwords.escape(command)} >/dev/null 2>&1")
  51. end
  52. def default_ffmpeg_bin
  53. local_bin = File.expand_path("~/.local/bin/ffmpeg")
  54. return local_bin if File.exist?(local_bin)
  55. "ffmpeg"
  56. end
  57. def extension_for(content_type:)
  58. value = content_type.to_s.downcase
  59. return ".mp4" if value.include?("mp4")
  60. return ".mov" if value.include?("quicktime")
  61. return ".webm" if value.include?("webm")
  62. ".mp4"
  63. end
  64. def empty_result(reason, stderr: nil)
  65. {
  66. frames: [],
  67. metadata: {
  68. source: "ffmpeg",
  69. reason: reason,
  70. stderr: stderr.to_s.presence
  71. }.compact
  72. }
  73. end
  74. end

app/services/video_metadata_service.rb

0.0% lines covered

100.0% branches covered

82 relevant lines. 0 lines covered and 82 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "open3"
  2. require "shellwords"
  3. require "tempfile"
  4. require "json"
  5. class VideoMetadataService
  6. def initialize(ffprobe_bin: ENV.fetch("FFPROBE_BIN", "ffprobe"))
  7. @ffprobe_bin = ffprobe_bin.to_s
  8. end
  9. def probe(video_bytes:, story_id:, content_type: nil)
  10. return { duration_seconds: nil, metadata: { reason: "video_bytes_missing" } } if video_bytes.blank?
  11. return { duration_seconds: nil, metadata: { reason: "ffprobe_missing" } } unless command_available?(@ffprobe_bin)
  12. Tempfile.create([ "story_probe_#{story_id}", extension_for(content_type: content_type) ]) do |video_file|
  13. video_file.binmode
  14. video_file.write(video_bytes)
  15. video_file.flush
  16. cmd = [
  17. @ffprobe_bin,
  18. "-v",
  19. "error",
  20. "-show_entries",
  21. "format=duration:stream=index,codec_type,codec_name,width,height,avg_frame_rate,channels,sample_rate",
  22. "-of",
  23. "json",
  24. video_file.path
  25. ]
  26. stdout, stderr, status = Open3.capture3(*cmd)
  27. return { duration_seconds: nil, metadata: { reason: "ffprobe_failed", stderr: stderr.to_s.presence }.compact } unless status.success?
  28. parsed = JSON.parse(stdout.to_s.presence || "{}")
  29. streams = Array(parsed["streams"]).select { |row| row.is_a?(Hash) }
  30. format = parsed["format"].is_a?(Hash) ? parsed["format"] : {}
  31. audio_stream = streams.find { |row| row["codec_type"].to_s == "audio" }
  32. video_stream = streams.find { |row| row["codec_type"].to_s == "video" }
  33. duration = format["duration"].to_f
  34. {
  35. duration_seconds: duration.positive? ? duration.round(2) : nil,
  36. metadata: {
  37. source: "ffprobe",
  38. has_audio: audio_stream.present?,
  39. audio_codec: audio_stream&.dig("codec_name"),
  40. channels: audio_stream&.dig("channels"),
  41. sample_rate_hz: integer_or_nil(audio_stream&.dig("sample_rate")),
  42. video_codec: video_stream&.dig("codec_name"),
  43. width: integer_or_nil(video_stream&.dig("width")),
  44. height: integer_or_nil(video_stream&.dig("height")),
  45. fps: frame_rate_to_float(video_stream&.dig("avg_frame_rate")),
  46. stream_count: streams.length
  47. }.compact
  48. }
  49. end
  50. rescue StandardError => e
  51. { duration_seconds: nil, metadata: { reason: "video_probe_error", stderr: e.message } }
  52. end
  53. private
  54. def command_available?(command)
  55. system("command -v #{Shellwords.escape(command)} >/dev/null 2>&1")
  56. end
  57. def extension_for(content_type:)
  58. value = content_type.to_s.downcase
  59. return ".mp4" if value.include?("mp4")
  60. return ".mov" if value.include?("quicktime")
  61. return ".webm" if value.include?("webm")
  62. ".mp4"
  63. end
  64. def integer_or_nil(value)
  65. number = value.to_s.to_i
  66. number.positive? ? number : nil
  67. end
  68. def frame_rate_to_float(value)
  69. text = value.to_s
  70. return nil if text.blank?
  71. if text.include?("/")
  72. numerator, denominator = text.split("/", 2).map(&:to_f)
  73. return nil if denominator.to_f <= 0.0
  74. (numerator / denominator).round(3)
  75. else
  76. number = text.to_f
  77. number.positive? ? number.round(3) : nil
  78. end
  79. rescue StandardError
  80. nil
  81. end
  82. end

app/services/video_thumbnail_service.rb

0.0% lines covered

100.0% branches covered

87 relevant lines. 0 lines covered and 87 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "digest"
  2. require "open3"
  3. require "shellwords"
  4. require "tempfile"
  5. class VideoThumbnailService
  6. DEFAULT_SEEK_SECONDS = 0.2
  7. MAX_THUMBNAIL_BYTES = 3 * 1024 * 1024
  8. def initialize(ffmpeg_bin: nil, seek_seconds: nil)
  9. resolved_bin = ffmpeg_bin.to_s.presence || ENV["FFMPEG_BIN"].to_s.presence || default_ffmpeg_bin
  10. @ffmpeg_bin = resolved_bin.to_s
  11. @seek_seconds = seek_seconds.to_f.positive? ? seek_seconds.to_f : DEFAULT_SEEK_SECONDS
  12. end
  13. def extract_first_frame(video_bytes:, reference_id:, content_type: nil)
  14. return empty_result("video_bytes_missing") if video_bytes.blank?
  15. return empty_result("ffmpeg_missing") unless command_available?(@ffmpeg_bin)
  16. Tempfile.create([ "video_thumb_source_#{safe_reference(reference_id)}", extension_for(content_type: content_type) ]) do |video_file|
  17. video_file.binmode
  18. video_file.write(video_bytes)
  19. video_file.flush
  20. Tempfile.create([ "video_thumb_output_#{safe_reference(reference_id)}", ".jpg" ]) do |thumb_file|
  21. thumb_file.binmode
  22. cmd = [
  23. @ffmpeg_bin,
  24. "-hide_banner",
  25. "-loglevel", "error",
  26. "-ss", format("%.2f", @seek_seconds),
  27. "-i", video_file.path,
  28. "-frames:v", "1",
  29. "-q:v", "3",
  30. "-f", "image2",
  31. thumb_file.path
  32. ]
  33. _stdout, stderr, status = Open3.capture3(*cmd)
  34. return empty_result("ffmpeg_extract_failed", stderr: stderr.to_s) unless status.success?
  35. image_bytes = File.binread(thumb_file.path)
  36. return empty_result("thumbnail_missing") if image_bytes.blank?
  37. return empty_result("thumbnail_too_large") if image_bytes.bytesize > MAX_THUMBNAIL_BYTES
  38. digest = Digest::SHA256.hexdigest("#{reference_id}:#{image_bytes.byteslice(0, 128)}")
  39. {
  40. ok: true,
  41. image_bytes: image_bytes,
  42. content_type: "image/jpeg",
  43. filename: "video_thumb_#{digest[0, 12]}.jpg",
  44. metadata: {
  45. source: "ffmpeg",
  46. seek_seconds: @seek_seconds,
  47. bytes: image_bytes.bytesize
  48. }
  49. }
  50. end
  51. end
  52. rescue StandardError => e
  53. empty_result("thumbnail_extraction_error", stderr: e.message)
  54. end
  55. private
  56. def command_available?(command)
  57. system("command -v #{Shellwords.escape(command)} >/dev/null 2>&1")
  58. end
  59. def safe_reference(value)
  60. value.to_s.gsub(/[^a-zA-Z0-9_-]/, "_").first(32).presence || "video"
  61. end
  62. def default_ffmpeg_bin
  63. local_bin = File.expand_path("~/.local/bin/ffmpeg")
  64. return local_bin if File.exist?(local_bin)
  65. "ffmpeg"
  66. end
  67. def extension_for(content_type:)
  68. value = content_type.to_s.downcase
  69. return ".mov" if value.include?("quicktime")
  70. return ".webm" if value.include?("webm")
  71. return ".mp4" if value.include?("mp4")
  72. ".mp4"
  73. end
  74. def empty_result(reason, stderr: nil)
  75. {
  76. ok: false,
  77. image_bytes: nil,
  78. content_type: nil,
  79. filename: nil,
  80. metadata: {
  81. source: "ffmpeg",
  82. reason: reason,
  83. stderr: stderr.to_s.presence
  84. }.compact
  85. }
  86. end
  87. end

app/services/workspace/actions_todo_queue_service.rb

0.0% lines covered

100.0% branches covered

227 relevant lines. 0 lines covered and 227 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. require "set"
  2. module Workspace
  3. class ActionsTodoQueueService
  4. DEFAULT_LIMIT = 30
  5. MAX_LIMIT = 120
  6. MAX_POST_AGE_DAYS = 5
  7. PRELOAD_MULTIPLIER = 8
  8. ENQUEUE_BATCH_SIZE = ENV.fetch("WORKSPACE_ACTIONS_ENQUEUE_BATCH_SIZE", 8).to_i.clamp(1, 30)
  9. NON_PROCESSABLE_STATUSES = %w[
  10. ready
  11. failed
  12. skipped_page_profile
  13. skipped_deleted_source
  14. skipped_non_user_post
  15. ].freeze
  16. def initialize(account:, limit: DEFAULT_LIMIT, enqueue_processing: true, now: Time.current)
  17. @account = account
  18. @limit = limit.to_i.clamp(1, MAX_LIMIT)
  19. @enqueue_processing = ActiveModel::Type::Boolean.new.cast(enqueue_processing)
  20. @now = now
  21. @profile_policy_cache = {}
  22. end
  23. def fetch!
  24. posts = candidate_posts
  25. return empty_result if posts.empty?
  26. sent_keys = commented_post_keys(profile_ids: posts.map(&:instagram_profile_id).uniq)
  27. items = posts.filter_map { |post| build_item(post: post, sent_keys: sent_keys) }
  28. return empty_result if items.empty?
  29. ordered = sort_items(items: items)
  30. enqueued_count = enqueue_processing_jobs(items: ordered) if @enqueue_processing
  31. {
  32. items: ordered.first(@limit),
  33. stats: {
  34. total_items: ordered.length,
  35. ready_items: ordered.count { |row| row[:suggestions].any? },
  36. processing_items: ordered.count { |row| row[:requires_processing] },
  37. enqueued_now: enqueued_count.to_i,
  38. refreshed_at: @now.iso8601(3)
  39. }
  40. }
  41. end
  42. private
  43. attr_reader :account, :limit, :now
  44. def empty_result
  45. {
  46. items: [],
  47. stats: {
  48. total_items: 0,
  49. ready_items: 0,
  50. processing_items: 0,
  51. enqueued_now: 0,
  52. refreshed_at: now.iso8601(3)
  53. }
  54. }
  55. end
  56. def candidate_posts
  57. scope_limit = [ limit * PRELOAD_MULTIPLIER, limit ].max
  58. cutoff = MAX_POST_AGE_DAYS.days.ago
  59. account.instagram_profile_posts
  60. .includes(instagram_profile: :profile_tags, media_attachment: :blob, preview_image_attachment: :blob)
  61. .where("taken_at >= ?", cutoff)
  62. .order(taken_at: :desc, id: :desc)
  63. .limit(scope_limit)
  64. .to_a
  65. rescue StandardError
  66. []
  67. end
  68. def build_item(post:, sent_keys:)
  69. profile = post.instagram_profile
  70. return nil unless profile
  71. return nil unless user_profile?(profile)
  72. return nil if source_deleted_post?(post)
  73. return nil unless user_created_post?(post)
  74. comment_key = "#{post.instagram_profile_id}:#{post.shortcode}"
  75. return nil if sent_keys.include?(comment_key)
  76. analysis = post.analysis.is_a?(Hash) ? post.analysis : {}
  77. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  78. policy = metadata["comment_generation_policy"].is_a?(Hash) ? metadata["comment_generation_policy"] : {}
  79. workspace_state = metadata["workspace_actions"].is_a?(Hash) ? metadata["workspace_actions"] : {}
  80. suggestions = Array(analysis["comment_suggestions"]).map { |value| value.to_s.strip }.reject(&:blank?).uniq.first(3)
  81. processing_status = derive_processing_status(post: post, suggestions: suggestions, workspace_state: workspace_state, policy: policy)
  82. processing_message = derive_processing_message(processing_status: processing_status, workspace_state: workspace_state, policy: policy, post: post)
  83. {
  84. post: post,
  85. profile: profile,
  86. analysis: analysis,
  87. metadata: metadata,
  88. suggestions: suggestions,
  89. policy: policy,
  90. workspace_state: workspace_state,
  91. processing_status: processing_status,
  92. processing_message: processing_message,
  93. requires_processing: suggestions.empty? && !NON_PROCESSABLE_STATUSES.include?(processing_status.to_s),
  94. post_taken_at: post.taken_at,
  95. profile_last_active_at: profile.last_active_at
  96. }
  97. end
  98. def derive_processing_status(post:, suggestions:, workspace_state:, policy:)
  99. return "ready" if suggestions.any?
  100. status = workspace_state["status"].to_s
  101. return status if status.present?
  102. return "waiting_media_download" unless post.media.attached?
  103. ai_status = post.ai_status.to_s
  104. return "waiting_post_analysis" if ai_status == "pending" || ai_status == "running"
  105. reason_code = policy["history_reason_code"].to_s
  106. if policy["status"].to_s == "blocked" && reason_code.in?(WorkspaceProcessActionsTodoPostJob::PROFILE_INCOMPLETE_REASON_CODES)
  107. return "waiting_build_history"
  108. end
  109. "queued_for_processing"
  110. end
  111. def derive_processing_message(processing_status:, workspace_state:, policy:, post:)
  112. case processing_status.to_s
  113. when "ready"
  114. "Suggestions are ready."
  115. when "waiting_media_download"
  116. "Preview media download is queued."
  117. when "waiting_post_analysis"
  118. "Post analysis is running in background."
  119. when "waiting_build_history", "waiting_profile_analysis"
  120. "Build History is running; comment generation will resume automatically."
  121. when "running"
  122. "Preparing suggestions in background."
  123. when "queued"
  124. "Queued for background processing."
  125. when "failed"
  126. workspace_state["last_error"].to_s.presence || "Background processing failed. Will retry."
  127. when "skipped_page_profile"
  128. "Skipped because this account is classified as a page."
  129. when "skipped_deleted_source"
  130. "Skipped because this post was deleted from source."
  131. when "skipped_non_user_post"
  132. "Skipped because this row is not a user-created post."
  133. else
  134. if post.ai_status.to_s == "analyzed"
  135. policy["blocked_reason"].to_s.presence || "Awaiting comment suggestions."
  136. else
  137. "Queued for analysis and suggestion generation."
  138. end
  139. end
  140. end
  141. def sort_items(items:)
  142. status_priority = {
  143. "failed" => 6,
  144. "running" => 5,
  145. "queued" => 4,
  146. "waiting_build_history" => 4,
  147. "waiting_profile_analysis" => 4,
  148. "waiting_post_analysis" => 3,
  149. "waiting_media_download" => 3,
  150. "queued_for_processing" => 2,
  151. "ready" => 1,
  152. "skipped_non_user_post" => 0,
  153. "skipped_deleted_source" => 0,
  154. "skipped_page_profile" => 0
  155. }
  156. items.sort_by do |item|
  157. [
  158. status_priority[item[:processing_status].to_s].to_i,
  159. item[:profile_last_active_at] || Time.at(0),
  160. item[:post_taken_at] || Time.at(0)
  161. ]
  162. end.reverse
  163. end
  164. def enqueue_processing_jobs(items:)
  165. candidates = items.select { |item| item[:requires_processing] }.first(ENQUEUE_BATCH_SIZE)
  166. enqueued = 0
  167. candidates.each do |item|
  168. result = WorkspaceProcessActionsTodoPostJob.enqueue_if_needed!(
  169. account: account,
  170. profile: item[:profile],
  171. post: item[:post],
  172. requested_by: "workspace_actions_queue"
  173. )
  174. enqueued += 1 if ActiveModel::Type::Boolean.new.cast(result[:enqueued])
  175. rescue StandardError
  176. next
  177. end
  178. enqueued
  179. end
  180. def user_profile?(profile)
  181. decision = cached_profile_decision(profile: profile)
  182. return false if ActiveModel::Type::Boolean.new.cast(decision[:skip_post_analysis])
  183. tag_names = profile.profile_tags.map { |tag| tag.name.to_s.downcase }
  184. return false if tag_names.any? { |name| %w[page brand business company publisher].include?(name) }
  185. true
  186. rescue StandardError
  187. false
  188. end
  189. def cached_profile_decision(profile:)
  190. @profile_policy_cache[profile.id] ||= Instagram::ProfileScanPolicy.new(profile: profile).decision
  191. end
  192. def source_deleted_post?(post)
  193. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  194. ActiveModel::Type::Boolean.new.cast(metadata["deleted_from_source"])
  195. end
  196. def user_created_post?(post)
  197. metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
  198. post_kind = metadata["post_kind"].to_s.downcase
  199. return false if post_kind == "story"
  200. product_type = metadata["product_type"].to_s.downcase
  201. return false if product_type == "story"
  202. return false if ActiveModel::Type::Boolean.new.cast(metadata["is_story"])
  203. true
  204. rescue StandardError
  205. false
  206. end
  207. def commented_post_keys(profile_ids:)
  208. return Set.new if profile_ids.blank?
  209. events =
  210. InstagramProfileEvent
  211. .joins(:instagram_profile)
  212. .where(instagram_profiles: { instagram_account_id: account.id, id: profile_ids })
  213. .where(kind: "post_comment_sent")
  214. .order(detected_at: :desc, id: :desc)
  215. .limit(2_000)
  216. Set.new(
  217. events.filter_map do |event|
  218. shortcode = event.metadata.is_a?(Hash) ? event.metadata["post_shortcode"].to_s.strip : ""
  219. next if shortcode.blank?
  220. "#{event.instagram_profile_id}:#{shortcode}"
  221. end
  222. )
  223. rescue StandardError
  224. Set.new
  225. end
  226. end
  227. end

lib/tasks/story_debug_analyzer.rb

0.0% lines covered

100.0% branches covered

130 relevant lines. 0 lines covered and 130 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. #!/usr/bin/env ruby
  2. # Story Debug Analyzer
  3. # This script analyzes the captured HTML snapshots and debug data to identify story skipping issues
  4. require 'json'
  5. require 'fileutils'
  6. class StoryDebugAnalyzer
  7. def initialize
  8. @debug_dir = Rails.root.join('tmp', 'story_debug_snapshots')
  9. @reel_debug_dir = Rails.root.join('tmp', 'story_reel_debug')
  10. end
  11. def analyze_all
  12. puts "=== Story Debug Analysis ==="
  13. puts "Analyzing captured data at #{Time.current}"
  14. puts
  15. analyze_html_snapshots
  16. analyze_reel_data
  17. generate_summary_report
  18. end
  19. private
  20. def analyze_html_snapshots
  21. puts "--- HTML Snapshots Analysis ---"
  22. return unless Dir.exist?(@debug_dir)
  23. html_files = Dir.glob(File.join(@debug_dir, '*.html')).sort
  24. puts "Found #{html_files.size} HTML snapshot files"
  25. html_files.each do |file|
  26. filename = File.basename(file)
  27. match = filename.match(/^(.+)_story_(\d+)_(\d+)_(.+)\.html$/)
  28. if match
  29. username = match[1]
  30. story_index = match[2].to_i
  31. story_id = match[3]
  32. timestamp = match[4]
  33. puts "\n📸 #{username} - Story #{story_index} (ID: #{story_id})"
  34. # Extract key information from HTML
  35. content = File.read(file)
  36. # Check if story was marked as already processed
  37. if content.include?('Already Processed: true')
  38. puts " ⚠️ Story was marked as ALREADY PROCESSED"
  39. else
  40. puts " ✅ Story was processed normally"
  41. end
  42. # Extract story count info
  43. if content.match(/Story Index:\s*(\d+)\s*\/\s*(\d+)/)
  44. current_index, total = $1.to_i, $2.to_i
  45. puts " 📊 Position: #{current_index}/#{total} stories"
  46. if current_index > 0 && content.include?('Already Processed: true')
  47. puts " 🔍 ISSUE: Story #{current_index} was skipped but it's not the first story!"
  48. end
  49. end
  50. # Look for recent events that might indicate duplicate processing
  51. if content.match(/"kind":\s*"story_uploaded"/)
  52. puts " 📝 Found previous story_upload events"
  53. end
  54. end
  55. end
  56. puts
  57. end
  58. def analyze_reel_data
  59. puts "--- Raw Reel Data Analysis ---"
  60. return unless Dir.exist?(@reel_debug_dir)
  61. json_files = Dir.glob(File.join(@reel_debug_dir, '*.json')).sort
  62. puts "Found #{json_files.size} reel data files"
  63. json_files.each do |file|
  64. filename = File.basename(file)
  65. match = filename.match(/^(.+)_reel_(\d+)_(.+)\.json$/)
  66. if match
  67. username = match[1]
  68. user_id = match[2]
  69. timestamp = match[3]
  70. puts "\n🎥 #{username} (User ID: #{user_id})"
  71. begin
  72. data = JSON.parse(File.read(file))
  73. puts " 📊 Items in reel: #{data['items_count']}"
  74. puts " 📊 Reels count: #{data['reels_count']}"
  75. puts " 📊 Reels media count: #{data['reels_media_count']}"
  76. # Analyze raw response structure
  77. raw = data['raw_response']
  78. if raw['reels']&.is_a?(Hash)
  79. raw['reels'].each do |reel_id, reel_data|
  80. if reel_data.is_a?(Hash) && reel_data['items'].is_a?(Array)
  81. puts " 📹 Reel #{reel_id}: #{reel_data['items'].size} items"
  82. # Show story IDs for debugging
  83. story_ids = reel_data['items'].map { |item| item['pk'] || item['id'] }.compact
  84. puts " 🆔 Story IDs: #{story_ids.join(', ')}"
  85. # Check for duplicate IDs
  86. if story_ids.size != story_ids.uniq.size
  87. puts " ⚠️ DUPLICATE STORY IDs DETECTED!"
  88. end
  89. end
  90. end
  91. end
  92. rescue JSON::ParserError => e
  93. puts " ❌ Failed to parse JSON: #{e.message}"
  94. end
  95. end
  96. end
  97. puts
  98. end
  99. def generate_summary_report
  100. puts "--- Summary Report ---"
  101. # Count total stories processed vs skipped
  102. total_snapshots = 0
  103. skipped_stories = 0
  104. if Dir.exist?(@debug_dir)
  105. html_files = Dir.glob(File.join(@debug_dir, '*.html'))
  106. total_snapshots = html_files.size
  107. html_files.each do |file|
  108. content = File.read(file)
  109. if content.include?('Already Processed: true')
  110. skipped_stories += 1
  111. end
  112. end
  113. end
  114. puts "📊 Total story snapshots: #{total_snapshots}"
  115. puts "📊 Stories skipped: #{skipped_stories}"
  116. puts "📊 Stories processed: #{total_snapshots - skipped_stories}"
  117. if skipped_stories > 0
  118. skip_percentage = (skipped_stories.to_f / total_snapshots * 100).round(1)
  119. puts "⚠️ Skip rate: #{skip_percentage}%"
  120. if skip_percentage > 50
  121. puts "🚨 HIGH skip rate detected! This indicates a potential issue with story processing logic."
  122. elsif skip_percentage > 25
  123. puts "⚠️ Elevated skip rate detected. Review the skipping logic."
  124. end
  125. end
  126. puts
  127. puts "📁 Debug files location:"
  128. puts " HTML snapshots: #{@debug_dir}"
  129. puts " Raw reel data: #{@reel_debug_dir}"
  130. puts
  131. puts "💡 Recommendations:"
  132. puts " 1. Check if stories are being incorrectly marked as duplicates"
  133. puts " 2. Verify story_id uniqueness in the raw reel data"
  134. puts " 3. Review the already_processed_story? method logic"
  135. puts " 4. Consider using force_analyze_all: true to bypass skipping for testing"
  136. end
  137. end
  138. # Run the analysis if this script is executed directly
  139. if __FILE__ == $0
  140. analyzer = StoryDebugAnalyzer.new
  141. analyzer.analyze_all
  142. end

lib/tasks/story_network_analyzer.rb

0.0% lines covered

100.0% branches covered

146 relevant lines. 0 lines covered and 146 lines missed.
0 total branches, 0 branches covered and 0 branches missed.
    
  1. #!/usr/bin/env ruby
  2. require "json"
  3. require "uri"
  4. class StoryNetworkAnalyzer
  5. DEBUG_GLOB = "log/instagram_debug/**/*.json".freeze
  6. def initialize(debug_glob: DEBUG_GLOB)
  7. @debug_glob = debug_glob
  8. end
  9. def analyze!
  10. files = Dir.glob(Rails.root.join(@debug_glob).to_s).sort
  11. endpoint_counts = Hash.new(0)
  12. endpoint_statuses = Hash.new { |h, k| h[k] = Hash.new(0) }
  13. story_graphql_counts = Hash.new(0)
  14. story_graphql_samples = Hash.new { |h, k| h[k] = [] }
  15. story_api_counts = Hash.new(0)
  16. story_api_samples = Hash.new { |h, k| h[k] = [] }
  17. files.each do |file|
  18. json = parse_json(File.read(file))
  19. next unless json.is_a?(Hash)
  20. logs = json["performance_logs"]
  21. next unless logs.is_a?(Array)
  22. request_meta = Hash.new { |h, k| h[k] = {} }
  23. logs.each do |entry|
  24. raw = entry.is_a?(Hash) ? entry["message"].to_s : ""
  25. outer = parse_json(raw)
  26. inner = outer.is_a?(Hash) ? outer["message"] : nil
  27. next unless inner.is_a?(Hash)
  28. method = inner["method"].to_s
  29. params = inner["params"].is_a?(Hash) ? inner["params"] : {}
  30. request_id = params["requestId"].to_s
  31. case method
  32. when "Network.requestWillBeSent"
  33. request = params["request"].is_a?(Hash) ? params["request"] : {}
  34. url = request["url"].to_s
  35. next if url.blank?
  36. endpoint = normalize_endpoint(url)
  37. next if endpoint.blank?
  38. endpoint_counts[endpoint] += 1
  39. request_meta[request_id][:endpoint] = endpoint if request_id.present?
  40. if story_api_endpoint?(endpoint)
  41. story_api_counts[endpoint] += 1
  42. add_sample(story_api_samples[endpoint], file)
  43. end
  44. when "Network.requestWillBeSentExtraInfo"
  45. headers = params["headers"].is_a?(Hash) ? params["headers"] : {}
  46. path = header_value(headers, ":path")
  47. friendly = header_value(headers, "x-fb-friendly-name")
  48. root_field = header_value(headers, "x-root-field-name")
  49. endpoint = normalize_endpoint(path)
  50. if endpoint.present?
  51. endpoint_counts[endpoint] += 1
  52. request_meta[request_id][:endpoint] = endpoint if request_id.present?
  53. end
  54. if story_graphql_signature?(friendly: friendly, root_field: root_field)
  55. key = [ endpoint.presence || "(unknown_path)", friendly, root_field ]
  56. story_graphql_counts[key] += 1
  57. add_sample(story_graphql_samples[key], file)
  58. end
  59. when "Network.responseReceived"
  60. status = params.dig("response", "status").to_i
  61. endpoint = request_meta.dig(request_id, :endpoint)
  62. next if endpoint.blank?
  63. endpoint_statuses[endpoint][status] += 1
  64. end
  65. end
  66. end
  67. {
  68. generated_at: Time.current.utc.iso8601(3),
  69. files_scanned: files.length,
  70. top_endpoints: sort_hash(endpoint_counts).first(80).map do |endpoint, count|
  71. {
  72. endpoint: endpoint,
  73. count: count,
  74. statuses: sort_hash(endpoint_statuses[endpoint]).to_h
  75. }
  76. end,
  77. story_graphql_signatures: sort_hash(story_graphql_counts).map do |(endpoint, friendly, root_field), count|
  78. {
  79. endpoint: endpoint,
  80. friendly_name: friendly,
  81. root_field: root_field,
  82. count: count,
  83. sample_files: story_graphql_samples[[ endpoint, friendly, root_field ]]
  84. }
  85. end,
  86. story_api_endpoints: sort_hash(story_api_counts).map do |endpoint, count|
  87. {
  88. endpoint: endpoint,
  89. count: count,
  90. sample_files: story_api_samples[endpoint]
  91. }
  92. end
  93. }
  94. end
  95. private
  96. def parse_json(raw)
  97. JSON.parse(raw)
  98. rescue StandardError
  99. nil
  100. end
  101. def normalize_endpoint(value)
  102. raw = value.to_s.strip
  103. return "" if raw.blank?
  104. if raw.start_with?("http://", "https://")
  105. uri = URI.parse(raw)
  106. path = uri.path.to_s
  107. query = uri.query.to_s
  108. query.present? ? "#{path}?#{query}" : path
  109. else
  110. raw
  111. end
  112. rescue StandardError
  113. ""
  114. end
  115. def header_value(headers, key)
  116. return "" unless headers.is_a?(Hash)
  117. headers[key].to_s.presence ||
  118. headers[key.downcase].to_s.presence ||
  119. headers[key.upcase].to_s.presence ||
  120. ""
  121. end
  122. def story_graphql_signature?(friendly:, root_field:)
  123. friendly_s = friendly.to_s
  124. root_s = root_field.to_s
  125. friendly_s.include?("StoriesV3") ||
  126. root_s.include?("__stories__") ||
  127. root_s.include?("__reels_") ||
  128. root_s.include?("__feed__reels")
  129. end
  130. def story_api_endpoint?(endpoint)
  131. endpoint_s = endpoint.to_s
  132. endpoint_s.include?("/api/v1/feed/reels_media/") ||
  133. endpoint_s.include?("/api/v1/stories/") ||
  134. endpoint_s.include?("/api/v1/story_interactions/") ||
  135. endpoint_s.include?("/api/v1/direct_v2/threads/broadcast/reel_share/") ||
  136. endpoint_s.include?("/stories/")
  137. end
  138. def add_sample(sample_array, file)
  139. return unless sample_array.is_a?(Array)
  140. relative = Pathname.new(file).relative_path_from(Rails.root).to_s
  141. sample_array << relative unless sample_array.include?(relative)
  142. sample_array.slice!(3..-1) if sample_array.length > 3
  143. end
  144. def sort_hash(hash)
  145. hash.to_a.sort_by { |(_, value)| -value.to_i }
  146. end
  147. end